gcc/input.c

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2018 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic-core.h"
  25 #include "selftest.h"
  26 #include "cpplib.h"
  27
  28 #ifndef HAVE_ICONV
  29 #define HAVE_ICONV 0
  30 #endif
  31
  32 /* This is a cache used by get_next_line to store the content of a
  33    file to be searched for file lines.  */
  34 struct fcache
  35 {
  36   /* These are information used to store a line boundary.  */
  37   struct line_info
  38   {
  39     /* The line number.  It starts from 1.  */
  40     size_t line_num;
  41
  42     /* The position (byte count) of the beginning of the line,
  43        relative to the file data pointer.  This starts at zero.  */
  44     size_t start_pos;
  45
  46     /* The position (byte count) of the last byte of the line.  This
  47        normally points to the '\n' character, or to one byte after the
  48        last byte of the file, if the file doesn't contain a '\n'
  49        character.  */
  50     size_t end_pos;
  51
  52     line_info (size_t l, size_t s, size_t e)
  53       : line_num (l), start_pos (s), end_pos (e)
  54     {}
  55
  56     line_info ()
  57       :line_num (0), start_pos (0), end_pos (0)
  58     {}
  59   };
  60
  61   /* The number of time this file has been accessed.  This is used
  62      to designate which file cache to evict from the cache
  63      array.  */
  64   unsigned use_count;
  65
  66   /* The file_path is the key for identifying a particular file in
  67      the cache.
  68      For libcpp-using code, the underlying buffer for this field is
  69      owned by the corresponding _cpp_file within the cpp_reader.  */
  70   const char *file_path;
  71
  72   FILE *fp;
  73
  74   /* This points to the content of the file that we've read so
  75      far.  */
  76   char *data;
  77
  78   /*  The size of the DATA array above.*/
  79   size_t size;
  80
  81   /* The number of bytes read from the underlying file so far.  This
  82      must be less (or equal) than SIZE above.  */
  83   size_t nb_read;
  84
  85   /* The index of the beginning of the current line.  */
  86   size_t line_start_idx;
  87
  88   /* The number of the previous line read.  This starts at 1.  Zero
  89      means we've read no line so far.  */
  90   size_t line_num;
  91
  92   /* This is the total number of lines of the current file.  At the
  93      moment, we try to get this information from the line map
  94      subsystem.  Note that this is just a hint.  When using the C++
  95      front-end, this hint is correct because the input file is then
  96      completely tokenized before parsing starts; so the line map knows
  97      the number of lines before compilation really starts.  For e.g,
  98      the C front-end, it can happen that we start emitting diagnostics
  99      before the line map has seen the end of the file.  */
 100   size_t total_lines;
 101
 102   /* Could this file be missing a trailing newline on its final line?
 103      Initially true (to cope with empty files), set to true/false
 104      as each line is read.  */
 105   bool missing_trailing_newline;
 106
 107   /* This is a record of the beginning and end of the lines we've seen
 108      while reading the file.  This is useful to avoid walking the data
 109      from the beginning when we are asked to read a line that is
 110      before LINE_START_IDX above.  Note that the maximum size of this
 111      record is fcache_line_record_size, so that the memory consumption
 112      doesn't explode.  We thus scale total_lines down to
 113      fcache_line_record_size.  */
 114   vec<line_info, va_heap> line_record;
 115
 116   fcache ();
 117   ~fcache ();
 118 };
 119
 120 /* Current position in real source file.  */
 121
 122 location_t input_location = UNKNOWN_LOCATION;
 123
 124 struct line_maps *line_table;
 125
 126 /* A stashed copy of "line_table" for use by selftest::line_table_test.
 127    This needs to be a global so that it can be a GC root, and thus
 128    prevent the stashed copy from being garbage-collected if the GC runs
 129    during a line_table_test.  */
 130
 131 struct line_maps *saved_line_table;
 132
 133 static fcache *fcache_tab;
 134 static const size_t fcache_tab_size = 16;
 135 static const size_t fcache_buffer_size = 4 * 1024;
 136 static const size_t fcache_line_record_size = 100;
 137
 138 /* Expand the source location LOC into a human readable location.  If
 139    LOC resolves to a builtin location, the file name of the readable
 140    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 141    TRUE and LOC is virtual, then it is resolved to the expansion
 142    point of the involved macro.  Otherwise, it is resolved to the
 143    spelling location of the token.
 144
 145    When resolving to the spelling location of the token, if the
 146    resulting location is for a built-in location (that is, it has no
 147    associated line/column) in the context of a macro expansion, the
 148    returned location is the first one (while unwinding the macro
 149    location towards its expansion point) that is in real source
 150    code.
 151
 152    ASPECT controls which part of the location to use.  */
 153
 154 static expanded_location
 155 expand_location_1 (source_location loc,
 156                    bool expansion_point_p,
 157                    enum location_aspect aspect)
 158 {
 159   expanded_location xloc;
 160   const line_map_ordinary *map;
 161   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 162   tree block = NULL;
 163
 164   if (IS_ADHOC_LOC (loc))
 165     {
 166       block = LOCATION_BLOCK (loc);
 167       loc = LOCATION_LOCUS (loc);
 168     }
 169
 170   memset (&xloc, 0, sizeof (xloc));
 171
 172   if (loc >= RESERVED_LOCATION_COUNT)
 173     {
 174       if (!expansion_point_p)
 175         {
 176           /* We want to resolve LOC to its spelling location.
 177
 178              But if that spelling location is a reserved location that
 179              appears in the context of a macro expansion (like for a
 180              location for a built-in token), let's consider the first
 181              location (toward the expansion point) that is not reserved;
 182              that is, the first location that is in real source code.  */
 183           loc = linemap_unwind_to_first_non_reserved_loc (line_table,
 184                                                           loc, NULL);
 185           lrk = LRK_SPELLING_LOCATION;
 186         }
 187       loc = linemap_resolve_location (line_table, loc, lrk, &map);
 188
 189       /* loc is now either in an ordinary map, or is a reserved location.
 190          If it is a compound location, the caret is in a spelling location,
 191          but the start/finish might still be a virtual location.
 192          Depending of what the caller asked for, we may need to recurse
 193          one level in order to resolve any virtual locations in the
 194          end-points.  */
 195       switch (aspect)
 196         {
 197         default:
 198           gcc_unreachable ();
 199           /* Fall through.  */
 200         case LOCATION_ASPECT_CARET:
 201           break;
 202         case LOCATION_ASPECT_START:
 203           {
 204             source_location start = get_start (loc);
 205             if (start != loc)
 206               return expand_location_1 (start, expansion_point_p, aspect);
 207           }
 208           break;
 209         case LOCATION_ASPECT_FINISH:
 210           {
 211             source_location finish = get_finish (loc);
 212             if (finish != loc)
 213               return expand_location_1 (finish, expansion_point_p, aspect);
 214           }
 215           break;
 216         }
 217       xloc = linemap_expand_location (line_table, map, loc);
 218     }
 219
 220   xloc.data = block;
 221   if (loc <= BUILTINS_LOCATION)
 222     xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
 223
 224   return xloc;
 225 }
 226
 227 /* Initialize the set of cache used for files accessed by caret
 228    diagnostic.  */
 229
 230 static void
 231 diagnostic_file_cache_init (void)
 232 {
 233   if (fcache_tab == NULL)
 234     fcache_tab = new fcache[fcache_tab_size];
 235 }
 236
 237 /* Free the resources used by the set of cache used for files accessed
 238    by caret diagnostic.  */
 239
 240 void
 241 diagnostic_file_cache_fini (void)
 242 {
 243   if (fcache_tab)
 244     {
 245       delete [] (fcache_tab);
 246       fcache_tab = NULL;
 247     }
 248 }
 249
 250 /* Return the total lines number that have been read so far by the
 251    line map (in the preprocessor) so far.  For languages like C++ that
 252    entirely preprocess the input file before starting to parse, this
 253    equals the actual number of lines of the file.  */
 254
 255 static size_t
 256 total_lines_num (const char *file_path)
 257 {
 258   size_t r = 0;
 259   source_location l = 0;
 260   if (linemap_get_file_highest_location (line_table, file_path, &l))
 261     {
 262       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 263       expanded_location xloc = expand_location (l);
 264       r = xloc.line;
 265     }
 266   return r;
 267 }
 268
 269 /* Lookup the cache used for the content of a given file accessed by
 270    caret diagnostic.  Return the found cached file, or NULL if no
 271    cached file was found.  */
 272
 273 static fcache*
 274 lookup_file_in_cache_tab (const char *file_path)
 275 {
 276   if (file_path == NULL)
 277     return NULL;
 278
 279   diagnostic_file_cache_init ();
 280
 281   /* This will contain the found cached file.  */
 282   fcache *r = NULL;
 283   for (unsigned i = 0; i < fcache_tab_size; ++i)
 284     {
 285       fcache *c = &fcache_tab[i];
 286       if (c->file_path && !strcmp (c->file_path, file_path))
 287         {
 288           ++c->use_count;
 289           r = c;
 290         }
 291     }
 292
 293   if (r)
 294     ++r->use_count;
 295
 296   return r;
 297 }
 298
 299 /* Purge any mention of FILENAME from the cache of files used for
 300    printing source code.  For use in selftests when working
 301    with tempfiles.  */
 302
 303 void
 304 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
 305 {
 306   gcc_assert (file_path);
 307
 308   fcache *r = lookup_file_in_cache_tab (file_path);
 309   if (!r)
 310     /* Not found.  */
 311     return;
 312
 313   r->file_path = NULL;
 314   if (r->fp)
 315     fclose (r->fp);
 316   r->fp = NULL;
 317   r->nb_read = 0;
 318   r->line_start_idx = 0;
 319   r->line_num = 0;
 320   r->line_record.truncate (0);
 321   r->use_count = 0;
 322   r->total_lines = 0;
 323   r->missing_trailing_newline = true;
 324 }
 325
 326 /* Return the file cache that has been less used, recently, or the
 327    first empty one.  If HIGHEST_USE_COUNT is non-null,
 328    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 329    in the cache table.  */
 330
 331 static fcache*
 332 evicted_cache_tab_entry (unsigned *highest_use_count)
 333 {
 334   diagnostic_file_cache_init ();
 335
 336   fcache *to_evict = &fcache_tab[0];
 337   unsigned huc = to_evict->use_count;
 338   for (unsigned i = 1; i < fcache_tab_size; ++i)
 339     {
 340       fcache *c = &fcache_tab[i];
 341       bool c_is_empty = (c->file_path == NULL);
 342
 343       if (c->use_count < to_evict->use_count
 344           || (to_evict->file_path && c_is_empty))
 345         /* We evict C because it's either an entry with a lower use
 346            count or one that is empty.  */
 347         to_evict = c;
 348
 349       if (huc < c->use_count)
 350         huc = c->use_count;
 351
 352       if (c_is_empty)
 353         /* We've reached the end of the cache; subsequent elements are
 354            all empty.  */
 355         break;
 356     }
 357
 358   if (highest_use_count)
 359     *highest_use_count = huc;
 360
 361   return to_evict;
 362 }
 363
 364 /* Create the cache used for the content of a given file to be
 365    accessed by caret diagnostic.  This cache is added to an array of
 366    cache and can be retrieved by lookup_file_in_cache_tab.  This
 367    function returns the created cache.  Note that only the last
 368    fcache_tab_size files are cached.  */
 369
 370 static fcache*
 371 add_file_to_cache_tab (const char *file_path)
 372 {
 373
 374   FILE *fp = fopen (file_path, "r");
 375   if (fp == NULL)
 376     return NULL;
 377
 378   unsigned highest_use_count = 0;
 379   fcache *r = evicted_cache_tab_entry (&highest_use_count);
 380   r->file_path = file_path;
 381   if (r->fp)
 382     fclose (r->fp);
 383   r->fp = fp;
 384   r->nb_read = 0;
 385   r->line_start_idx = 0;
 386   r->line_num = 0;
 387   r->line_record.truncate (0);
 388   /* Ensure that this cache entry doesn't get evicted next time
 389      add_file_to_cache_tab is called.  */
 390   r->use_count = ++highest_use_count;
 391   r->total_lines = total_lines_num (file_path);
 392   r->missing_trailing_newline = true;
 393
 394   return r;
 395 }
 396
 397 /* Lookup the cache used for the content of a given file accessed by
 398    caret diagnostic.  If no cached file was found, create a new cache
 399    for this file, add it to the array of cached file and return
 400    it.  */
 401
 402 static fcache*
 403 lookup_or_add_file_to_cache_tab (const char *file_path)
 404 {
 405   fcache *r = lookup_file_in_cache_tab (file_path);
 406   if (r == NULL)
 407     r = add_file_to_cache_tab (file_path);
 408   return r;
 409 }
 410
 411 /* Default constructor for a cache of file used by caret
 412    diagnostic.  */
 413
 414 fcache::fcache ()
 415 : use_count (0), file_path (NULL), fp (NULL), data (0),
 416   size (0), nb_read (0), line_start_idx (0), line_num (0),
 417   total_lines (0), missing_trailing_newline (true)
 418 {
 419   line_record.create (0);
 420 }
 421
 422 /* Destructor for a cache of file used by caret diagnostic.  */
 423
 424 fcache::~fcache ()
 425 {
 426   if (fp)
 427     {
 428       fclose (fp);
 429       fp = NULL;
 430     }
 431   if (data)
 432     {
 433       XDELETEVEC (data);
 434       data = 0;
 435     }
 436   line_record.release ();
 437 }
 438
 439 /* Returns TRUE iff the cache would need to be filled with data coming
 440    from the file.  That is, either the cache is empty or full or the
 441    current line is empty.  Note that if the cache is full, it would
 442    need to be extended and filled again.  */
 443
 444 static bool
 445 needs_read (fcache *c)
 446 {
 447   return (c->nb_read == 0
 448           || c->nb_read == c->size
 449           || (c->line_start_idx >= c->nb_read - 1));
 450 }
 451
 452 /*  Return TRUE iff the cache is full and thus needs to be
 453     extended.  */
 454
 455 static bool
 456 needs_grow (fcache *c)
 457 {
 458   return c->nb_read == c->size;
 459 }
 460
 461 /* Grow the cache if it needs to be extended.  */
 462
 463 static void
 464 maybe_grow (fcache *c)
 465 {
 466   if (!needs_grow (c))
 467     return;
 468
 469   size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
 470   c->data = XRESIZEVEC (char, c->data, size);
 471   c->size = size;
 472 }
 473
 474 /*  Read more data into the cache.  Extends the cache if need be.
 475     Returns TRUE iff new data could be read.  */
 476
 477 static bool
 478 read_data (fcache *c)
 479 {
 480   if (feof (c->fp) || ferror (c->fp))
 481     return false;
 482
 483   maybe_grow (c);
 484
 485   char * from = c->data + c->nb_read;
 486   size_t to_read = c->size - c->nb_read;
 487   size_t nb_read = fread (from, 1, to_read, c->fp);
 488
 489   if (ferror (c->fp))
 490     return false;
 491
 492   c->nb_read += nb_read;
 493   return !!nb_read;
 494 }
 495
 496 /* Read new data iff the cache needs to be filled with more data
 497    coming from the file FP.  Return TRUE iff the cache was filled with
 498    mode data.  */
 499
 500 static bool
 501 maybe_read_data (fcache *c)
 502 {
 503   if (!needs_read (c))
 504     return false;
 505   return read_data (c);
 506 }
 507
 508 /* Read a new line from file FP, using C as a cache for the data
 509    coming from the file.  Upon successful completion, *LINE is set to
 510    the beginning of the line found.  *LINE points directly in the
 511    line cache and is only valid until the next call of get_next_line.
 512    *LINE_LEN is set to the length of the line.  Note that the line
 513    does not contain any terminal delimiter.  This function returns
 514    true if some data was read or process from the cache, false
 515    otherwise.  Note that subsequent calls to get_next_line might
 516    make the content of *LINE invalid.  */
 517
 518 static bool
 519 get_next_line (fcache *c, char **line, ssize_t *line_len)
 520 {
 521   /* Fill the cache with data to process.  */
 522   maybe_read_data (c);
 523
 524   size_t remaining_size = c->nb_read - c->line_start_idx;
 525   if (remaining_size == 0)
 526     /* There is no more data to process.  */
 527     return false;
 528
 529   char *line_start = c->data + c->line_start_idx;
 530
 531   char *next_line_start = NULL;
 532   size_t len = 0;
 533   char *line_end = (char *) memchr (line_start, '\n', remaining_size);
 534   if (line_end == NULL)
 535     {
 536       /* We haven't found the end-of-line delimiter in the cache.
 537          Fill the cache with more data from the file and look for the
 538          '\n'.  */
 539       while (maybe_read_data (c))
 540         {
 541           line_start = c->data + c->line_start_idx;
 542           remaining_size = c->nb_read - c->line_start_idx;
 543           line_end = (char *) memchr (line_start, '\n', remaining_size);
 544           if (line_end != NULL)
 545             {
 546               next_line_start = line_end + 1;
 547               break;
 548             }
 549         }
 550       if (line_end == NULL)
 551         {
 552           /* We've loadded all the file into the cache and still no
 553              '\n'.  Let's say the line ends up at one byte passed the
 554              end of the file.  This is to stay consistent with the case
 555              of when the line ends up with a '\n' and line_end points to
 556              that terminal '\n'.  That consistency is useful below in
 557              the len calculation.  */
 558           line_end = c->data + c->nb_read ;
 559           c->missing_trailing_newline = true;
 560         }
 561       else
 562         c->missing_trailing_newline = false;
 563     }
 564   else
 565     {
 566       next_line_start = line_end + 1;
 567       c->missing_trailing_newline = false;
 568     }
 569
 570   if (ferror (c->fp))
 571     return false;
 572
 573   /* At this point, we've found the end of the of line.  It either
 574      points to the '\n' or to one byte after the last byte of the
 575      file.  */
 576   gcc_assert (line_end != NULL);
 577
 578   len = line_end - line_start;
 579
 580   if (c->line_start_idx < c->nb_read)
 581     *line = line_start;
 582
 583   ++c->line_num;
 584
 585   /* Before we update our line record, make sure the hint about the
 586      total number of lines of the file is correct.  If it's not, then
 587      we give up recording line boundaries from now on.  */
 588   bool update_line_record = true;
 589   if (c->line_num > c->total_lines)
 590     update_line_record = false;
 591
 592     /* Now update our line record so that re-reading lines from the
 593      before c->line_start_idx is faster.  */
 594   if (update_line_record
 595       && c->line_record.length () < fcache_line_record_size)
 596     {
 597       /* If the file lines fits in the line record, we just record all
 598          its lines ...*/
 599       if (c->total_lines <= fcache_line_record_size
 600           && c->line_num > c->line_record.length ())
 601         c->line_record.safe_push (fcache::line_info (c->line_num,
 602                                                  c->line_start_idx,
 603                                                  line_end - c->data));
 604       else if (c->total_lines > fcache_line_record_size)
 605         {
 606           /* ... otherwise, we just scale total_lines down to
 607              (fcache_line_record_size lines.  */
 608           size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
 609           if (c->line_record.length () == 0
 610               || n >= c->line_record.length ())
 611             c->line_record.safe_push (fcache::line_info (c->line_num,
 612                                                      c->line_start_idx,
 613                                                      line_end - c->data));
 614         }
 615     }
 616
 617   /* Update c->line_start_idx so that it points to the next line to be
 618      read.  */
 619   if (next_line_start)
 620     c->line_start_idx = next_line_start - c->data;
 621   else
 622     /* We didn't find any terminal '\n'.  Let's consider that the end
 623        of line is the end of the data in the cache.  The next
 624        invocation of get_next_line will either read more data from the
 625        underlying file or return false early because we've reached the
 626        end of the file.  */
 627     c->line_start_idx = c->nb_read;
 628
 629   *line_len = len;
 630
 631   return true;
 632 }
 633
 634 /* Consume the next bytes coming from the cache (or from its
 635    underlying file if there are remaining unread bytes in the file)
 636    until we reach the next end-of-line (or end-of-file).  There is no
 637    copying from the cache involved.  Return TRUE upon successful
 638    completion.  */
 639
 640 static bool
 641 goto_next_line (fcache *cache)
 642 {
 643   char *l;
 644   ssize_t len;
 645
 646   return get_next_line (cache, &l, &len);
 647 }
 648
 649 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 650    If the line was read successfully, *LINE points to the beginning
 651    of the line in the file cache and *LINE_LEN is the length of the
 652    line.  *LINE is not nul-terminated, but may contain zero bytes.
 653    *LINE is only valid until the next call of read_line_num.
 654    This function returns bool if a line was read.  */
 655
 656 static bool
 657 read_line_num (fcache *c, size_t line_num,
 658                char **line, ssize_t *line_len)
 659 {
 660   gcc_assert (line_num > 0);
 661
 662   if (line_num <= c->line_num)
 663     {
 664       /* We've been asked to read lines that are before c->line_num.
 665          So lets use our line record (if it's not empty) to try to
 666          avoid re-reading the file from the beginning again.  */
 667
 668       if (c->line_record.is_empty ())
 669         {
 670           c->line_start_idx = 0;
 671           c->line_num = 0;
 672         }
 673       else
 674         {
 675           fcache::line_info *i = NULL;
 676           if (c->total_lines <= fcache_line_record_size)
 677             {
 678               /* In languages where the input file is not totally
 679                  preprocessed up front, the c->total_lines hint
 680                  can be smaller than the number of lines of the
 681                  file.  In that case, only the first
 682                  c->total_lines have been recorded.
 683
 684                  Otherwise, the first c->total_lines we've read have
 685                  their start/end recorded here.  */
 686               i = (line_num <= c->total_lines)
 687                 ? &c->line_record[line_num - 1]
 688                 : &c->line_record[c->total_lines - 1];
 689               gcc_assert (i->line_num <= line_num);
 690             }
 691           else
 692             {
 693               /*  So the file had more lines than our line record
 694                   size.  Thus the number of lines we've recorded has
 695                   been scaled down to fcache_line_reacord_size.  Let's
 696                   pick the start/end of the recorded line that is
 697                   closest to line_num.  */
 698               size_t n = (line_num <= c->total_lines)
 699                 ? line_num * fcache_line_record_size / c->total_lines
 700                 : c ->line_record.length () - 1;
 701               if (n < c->line_record.length ())
 702                 {
 703                   i = &c->line_record[n];
 704                   gcc_assert (i->line_num <= line_num);
 705                 }
 706             }
 707
 708           if (i && i->line_num == line_num)
 709             {
 710               /* We have the start/end of the line.  */
 711               *line = c->data + i->start_pos;
 712               *line_len = i->end_pos - i->start_pos;
 713               return true;
 714             }
 715
 716           if (i)
 717             {
 718               c->line_start_idx = i->start_pos;
 719               c->line_num = i->line_num - 1;
 720             }
 721           else
 722             {
 723               c->line_start_idx = 0;
 724               c->line_num = 0;
 725             }
 726         }
 727     }
 728
 729   /*  Let's walk from line c->line_num up to line_num - 1, without
 730       copying any line.  */
 731   while (c->line_num < line_num - 1)
 732     if (!goto_next_line (c))
 733       return false;
 734
 735   /* The line we want is the next one.  Let's read and copy it back to
 736      the caller.  */
 737   return get_next_line (c, line, line_len);
 738 }
 739
 740 /* Return the physical source line that corresponds to FILE_PATH/LINE.
 741    The line is not nul-terminated.  The returned pointer is only
 742    valid until the next call of location_get_source_line.
 743    Note that the line can contain several null characters,
 744    so the returned value's length has the actual length of the line.
 745    If the function fails, a NULL char_span is returned.  */
 746
 747 char_span
 748 location_get_source_line (const char *file_path, int line)
 749 {
 750   char *buffer = NULL;
 751   ssize_t len;
 752
 753   if (line == 0)
 754     return char_span (NULL, 0);
 755
 756   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 757   if (c == NULL)
 758     return char_span (NULL, 0);
 759
 760   bool read = read_line_num (c, line, &buffer, &len);
 761   if (!read)
 762     return char_span (NULL, 0);
 763
 764   return char_span (buffer, len);
 765 }
 766
 767 /* Determine if FILE_PATH missing a trailing newline on its final line.
 768    Only valid to call once all of the file has been loaded, by
 769    requesting a line number beyond the end of the file.  */
 770
 771 bool
 772 location_missing_trailing_newline (const char *file_path)
 773 {
 774   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 775   if (c == NULL)
 776     return false;
 777
 778   return c->missing_trailing_newline;
 779 }
 780
 781 /* Test if the location originates from the spelling location of a
 782    builtin-tokens.  That is, return TRUE if LOC is a (possibly
 783    virtual) location of a built-in token that appears in the expansion
 784    list of a macro.  Please note that this function also works on
 785    tokens that result from built-in tokens.  For instance, the
 786    function would return true if passed a token "4" that is the result
 787    of the expansion of the built-in __LINE__ macro.  */
 788 bool
 789 is_location_from_builtin_token (source_location loc)
 790 {
 791   const line_map_ordinary *map = NULL;
 792   loc = linemap_resolve_location (line_table, loc,
 793                                   LRK_SPELLING_LOCATION, &map);
 794   return loc == BUILTINS_LOCATION;
 795 }
 796
 797 /* Expand the source location LOC into a human readable location.  If
 798    LOC is virtual, it resolves to the expansion point of the involved
 799    macro.  If LOC resolves to a builtin location, the file name of the
 800    readable location is set to the string "<built-in>".  */
 801
 802 expanded_location
 803 expand_location (source_location loc)
 804 {
 805   return expand_location_1 (loc, /*expansion_point_p=*/true,
 806                             LOCATION_ASPECT_CARET);
 807 }
 808
 809 /* Expand the source location LOC into a human readable location.  If
 810    LOC is virtual, it resolves to the expansion location of the
 811    relevant macro.  If LOC resolves to a builtin location, the file
 812    name of the readable location is set to the string
 813    "<built-in>".  */
 814
 815 expanded_location
 816 expand_location_to_spelling_point (source_location loc,
 817                                    enum location_aspect aspect)
 818 {
 819   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
 820 }
 821
 822 /* The rich_location class within libcpp requires a way to expand
 823    source_location instances, and relies on the client code
 824    providing a symbol named
 825      linemap_client_expand_location_to_spelling_point
 826    to do this.
 827
 828    This is the implementation for libcommon.a (all host binaries),
 829    which simply calls into expand_location_1.  */
 830
 831 expanded_location
 832 linemap_client_expand_location_to_spelling_point (source_location loc,
 833                                                   enum location_aspect aspect)
 834 {
 835   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
 836 }
 837
 838
 839 /* If LOCATION is in a system header and if it is a virtual location for
 840    a token coming from the expansion of a macro, unwind it to the
 841    location of the expansion point of the macro.  Otherwise, just return
 842    LOCATION.
 843
 844    This is used for instance when we want to emit diagnostics about a
 845    token that may be located in a macro that is itself defined in a
 846    system header, for example, for the NULL macro.  In such a case, if
 847    LOCATION were passed directly to diagnostic functions such as
 848    warning_at, the diagnostic would be suppressed (unless
 849    -Wsystem-headers).  */
 850
 851 source_location
 852 expansion_point_location_if_in_system_header (source_location location)
 853 {
 854   if (in_system_header_at (location))
 855     location = linemap_resolve_location (line_table, location,
 856                                          LRK_MACRO_EXPANSION_POINT,
 857                                          NULL);
 858   return location;
 859 }
 860
 861 /* If LOCATION is a virtual location for a token coming from the expansion
 862    of a macro, unwind to the location of the expansion point of the macro.  */
 863
 864 source_location
 865 expansion_point_location (source_location location)
 866 {
 867   return linemap_resolve_location (line_table, location,
 868                                    LRK_MACRO_EXPANSION_POINT, NULL);
 869 }
 870
 871 /* Construct a location with caret at CARET, ranging from START to
 872    finish e.g.
 873
 874                  11111111112
 875         12345678901234567890
 876      522
 877      523   return foo + bar;
 878                   ~~~~^~~~~
 879      524
 880
 881    The location's caret is at the "+", line 523 column 15, but starts
 882    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
 883    of "bar" at column 19.  */
 884
 885 location_t
 886 make_location (location_t caret, location_t start, location_t finish)
 887 {
 888   location_t pure_loc = get_pure_location (caret);
 889   source_range src_range;
 890   src_range.m_start = get_start (start);
 891   src_range.m_finish = get_finish (finish);
 892   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
 893                                                    pure_loc,
 894                                                    src_range,
 895                                                    NULL);
 896   return combined_loc;
 897 }
 898
 899 /* Same as above, but taking a source range rather than two locations.  */
 900
 901 location_t
 902 make_location (location_t caret, source_range src_range)
 903 {
 904   location_t pure_loc = get_pure_location (caret);
 905   return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
 906 }
 907
 908 #define ONE_K 1024
 909 #define ONE_M (ONE_K * ONE_K)
 910
 911 /* Display a number as an integer multiple of either:
 912    - 1024, if said integer is >= to 10 K (in base 2)
 913    - 1024 * 1024, if said integer is >= 10 M in (base 2)
 914  */
 915 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
 916                   ? (x) \
 917                   : ((x) < 10 * ONE_M \
 918                      ? (x) / ONE_K \
 919                      : (x) / ONE_M)))
 920
 921 /* For a given integer, display either:
 922    - the character 'k', if the number is higher than 10 K (in base 2)
 923      but strictly lower than 10 M (in base 2)
 924    - the character 'M' if the number is higher than 10 M (in base2)
 925    - the charcter ' ' if the number is strictly lower  than 10 K  */
 926 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
 927
 928 /* Display an integer amount as multiple of 1K or 1M (in base 2).
 929    Display the correct unit (either k, M, or ' ') after the amount, as
 930    well.  */
 931 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
 932
 933 /* Dump statistics to stderr about the memory usage of the line_table
 934    set of line maps.  This also displays some statistics about macro
 935    expansion.  */
 936
 937 void
 938 dump_line_table_statistics (void)
 939 {
 940   struct linemap_stats s;
 941   long total_used_map_size,
 942     macro_maps_size,
 943     total_allocated_map_size;
 944
 945   memset (&s, 0, sizeof (s));
 946
 947   linemap_get_statistics (line_table, &s);
 948
 949   macro_maps_size = s.macro_maps_used_size
 950     + s.macro_maps_locations_size;
 951
 952   total_allocated_map_size = s.ordinary_maps_allocated_size
 953     + s.macro_maps_allocated_size
 954     + s.macro_maps_locations_size;
 955
 956   total_used_map_size = s.ordinary_maps_used_size
 957     + s.macro_maps_used_size
 958     + s.macro_maps_locations_size;
 959
 960   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
 961            s.num_expanded_macros);
 962   if (s.num_expanded_macros != 0)
 963     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
 964              s.num_macro_tokens / s.num_expanded_macros);
 965   fprintf (stderr,
 966            "\nLine Table allocations during the "
 967            "compilation process\n");
 968   fprintf (stderr, "Number of ordinary maps used:        %5ld%c\n",
 969            SCALE (s.num_ordinary_maps_used),
 970            STAT_LABEL (s.num_ordinary_maps_used));
 971   fprintf (stderr, "Ordinary map used size:              %5ld%c\n",
 972            SCALE (s.ordinary_maps_used_size),
 973            STAT_LABEL (s.ordinary_maps_used_size));
 974   fprintf (stderr, "Number of ordinary maps allocated:   %5ld%c\n",
 975            SCALE (s.num_ordinary_maps_allocated),
 976            STAT_LABEL (s.num_ordinary_maps_allocated));
 977   fprintf (stderr, "Ordinary maps allocated size:        %5ld%c\n",
 978            SCALE (s.ordinary_maps_allocated_size),
 979            STAT_LABEL (s.ordinary_maps_allocated_size));
 980   fprintf (stderr, "Number of macro maps used:           %5ld%c\n",
 981            SCALE (s.num_macro_maps_used),
 982            STAT_LABEL (s.num_macro_maps_used));
 983   fprintf (stderr, "Macro maps used size:                %5ld%c\n",
 984            SCALE (s.macro_maps_used_size),
 985            STAT_LABEL (s.macro_maps_used_size));
 986   fprintf (stderr, "Macro maps locations size:           %5ld%c\n",
 987            SCALE (s.macro_maps_locations_size),
 988            STAT_LABEL (s.macro_maps_locations_size));
 989   fprintf (stderr, "Macro maps size:                     %5ld%c\n",
 990            SCALE (macro_maps_size),
 991            STAT_LABEL (macro_maps_size));
 992   fprintf (stderr, "Duplicated maps locations size:      %5ld%c\n",
 993            SCALE (s.duplicated_macro_maps_locations_size),
 994            STAT_LABEL (s.duplicated_macro_maps_locations_size));
 995   fprintf (stderr, "Total allocated maps size:           %5ld%c\n",
 996            SCALE (total_allocated_map_size),
 997            STAT_LABEL (total_allocated_map_size));
 998   fprintf (stderr, "Total used maps size:                %5ld%c\n",
 999            SCALE (total_used_map_size),
1000            STAT_LABEL (total_used_map_size));
1001   fprintf (stderr, "Ad-hoc table size:                   %5ld%c\n",
1002            SCALE (s.adhoc_table_size),
1003            STAT_LABEL (s.adhoc_table_size));
1004   fprintf (stderr, "Ad-hoc table entries used:           %5ld\n",
1005            s.adhoc_table_entries_used);
1006   fprintf (stderr, "optimized_ranges: %i\n",
1007            line_table->num_optimized_ranges);
1008   fprintf (stderr, "unoptimized_ranges: %i\n",
1009            line_table->num_unoptimized_ranges);
1010
1011   fprintf (stderr, "\n");
1012 }
1013
1014 /* Get location one beyond the final location in ordinary map IDX.  */
1015
1016 static source_location
1017 get_end_location (struct line_maps *set, unsigned int idx)
1018 {
1019   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1020     return set->highest_location;
1021
1022   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1023   return MAP_START_LOCATION (next_map);
1024 }
1025
1026 /* Helper function for write_digit_row.  */
1027
1028 static void
1029 write_digit (FILE *stream, int digit)
1030 {
1031   fputc ('0' + (digit % 10), stream);
1032 }
1033
1034 /* Helper function for dump_location_info.
1035    Write a row of numbers to STREAM, numbering a source line,
1036    giving the units, tens, hundreds etc of the column number.  */
1037
1038 static void
1039 write_digit_row (FILE *stream, int indent,
1040                  const line_map_ordinary *map,
1041                  source_location loc, int max_col, int divisor)
1042 {
1043   fprintf (stream, "%*c", indent, ' ');
1044   fprintf (stream, "|");
1045   for (int column = 1; column < max_col; column++)
1046     {
1047       source_location column_loc = loc + (column << map->m_range_bits);
1048       write_digit (stream, column_loc / divisor);
1049     }
1050   fprintf (stream, "\n");
1051 }
1052
1053 /* Write a half-closed (START) / half-open (END) interval of
1054    source_location to STREAM.  */
1055
1056 static void
1057 dump_location_range (FILE *stream,
1058                      source_location start, source_location end)
1059 {
1060   fprintf (stream,
1061            "  source_location interval: %u <= loc < %u\n",
1062            start, end);
1063 }
1064
1065 /* Write a labelled description of a half-closed (START) / half-open (END)
1066    interval of source_location to STREAM.  */
1067
1068 static void
1069 dump_labelled_location_range (FILE *stream,
1070                               const char *name,
1071                               source_location start, source_location end)
1072 {
1073   fprintf (stream, "%s\n", name);
1074   dump_location_range (stream, start, end);
1075   fprintf (stream, "\n");
1076 }
1077
1078 /* Write a visualization of the locations in the line_table to STREAM.  */
1079
1080 void
1081 dump_location_info (FILE *stream)
1082 {
1083   /* Visualize the reserved locations.  */
1084   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1085                                 0, RESERVED_LOCATION_COUNT);
1086
1087   /* Visualize the ordinary line_map instances, rendering the sources. */
1088   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1089     {
1090       source_location end_location = get_end_location (line_table, idx);
1091       /* half-closed: doesn't include this one. */
1092
1093       const line_map_ordinary *map
1094         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1095       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1096       dump_location_range (stream,
1097                            MAP_START_LOCATION (map), end_location);
1098       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1099       fprintf (stream, "  starting at line: %i\n",
1100                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1101       fprintf (stream, "  column and range bits: %i\n",
1102                map->m_column_and_range_bits);
1103       fprintf (stream, "  column bits: %i\n",
1104                map->m_column_and_range_bits - map->m_range_bits);
1105       fprintf (stream, "  range bits: %i\n",
1106                map->m_range_bits);
1107
1108       /* Render the span of source lines that this "map" covers.  */
1109       for (source_location loc = MAP_START_LOCATION (map);
1110            loc < end_location;
1111            loc += (1 << map->m_range_bits) )
1112         {
1113           gcc_assert (pure_location_p (line_table, loc) );
1114
1115           expanded_location exploc
1116             = linemap_expand_location (line_table, map, loc);
1117
1118           if (exploc.column == 0)
1119             {
1120               /* Beginning of a new source line: draw the line.  */
1121
1122               char_span line_text = location_get_source_line (exploc.file,
1123                                                               exploc.line);
1124               if (!line_text)
1125                 break;
1126               fprintf (stream,
1127                        "%s:%3i|loc:%5i|%.*s\n",
1128                        exploc.file, exploc.line,
1129                        loc,
1130                        (int)line_text.length (), line_text.get_buffer ());
1131
1132               /* "loc" is at column 0, which means "the whole line".
1133                  Render the locations *within* the line, by underlining
1134                  it, showing the source_location numeric values
1135                  at each column.  */
1136               size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1137               if (max_col > line_text.length ())
1138                 max_col = line_text.length () + 1;
1139
1140               int indent = 14 + strlen (exploc.file);
1141
1142               /* Thousands.  */
1143               if (end_location > 999)
1144                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1145
1146               /* Hundreds.  */
1147               if (end_location > 99)
1148                 write_digit_row (stream, indent, map, loc, max_col, 100);
1149
1150               /* Tens.  */
1151               write_digit_row (stream, indent, map, loc, max_col, 10);
1152
1153               /* Units.  */
1154               write_digit_row (stream, indent, map, loc, max_col, 1);
1155             }
1156         }
1157       fprintf (stream, "\n");
1158     }
1159
1160   /* Visualize unallocated values.  */
1161   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1162                                 line_table->highest_location,
1163                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1164
1165   /* Visualize the macro line_map instances, rendering the sources. */
1166   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1167     {
1168       /* Each macro map that is allocated owns source_location values
1169          that are *lower* that the one before them.
1170          Hence it's meaningful to view them either in order of ascending
1171          source locations, or in order of ascending macro map index.  */
1172       const bool ascending_source_locations = true;
1173       unsigned int idx = (ascending_source_locations
1174                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1175                           : i);
1176       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1177       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1178                idx,
1179                linemap_map_get_macro_name (map),
1180                MACRO_MAP_NUM_MACRO_TOKENS (map));
1181       dump_location_range (stream,
1182                            map->start_location,
1183                            (map->start_location
1184                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1185       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1186               "expansion point is location %i",
1187               MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1188       fprintf (stream, "  map->start_location: %u\n",
1189                map->start_location);
1190
1191       fprintf (stream, "  macro_locations:\n");
1192       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1193         {
1194           source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1195           source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1196
1197           /* linemap_add_macro_token encodes token numbers in an expansion
1198              by putting them after MAP_START_LOCATION. */
1199
1200           /* I'm typically seeing 4 uninitialized entries at the end of
1201              0xafafafaf.
1202              This appears to be due to macro.c:replace_args
1203              adding 2 extra args for padding tokens; presumably there may
1204              be a leading and/or trailing padding token injected,
1205              each for 2 more location slots.
1206              This would explain there being up to 4 source_locations slots
1207              that may be uninitialized.  */
1208
1209           fprintf (stream, "    %u: %u, %u\n",
1210                    i,
1211                    x,
1212                    y);
1213           if (x == y)
1214             {
1215               if (x < MAP_START_LOCATION (map))
1216                 inform (x, "token %u has x-location == y-location == %u", i, x);
1217               else
1218                 fprintf (stream,
1219                          "x-location == y-location == %u encodes token # %u\n",
1220                          x, x - MAP_START_LOCATION (map));
1221                 }
1222           else
1223             {
1224               inform (x, "token %u has x-location == %u", i, x);
1225               inform (x, "token %u has y-location == %u", i, y);
1226             }
1227         }
1228       fprintf (stream, "\n");
1229     }
1230
1231   /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1232      macro map, presumably due to an off-by-one error somewhere
1233      between the logic in linemap_enter_macro and
1234      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1235   dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1236                                 MAX_SOURCE_LOCATION,
1237                                 MAX_SOURCE_LOCATION + 1);
1238
1239   /* Visualize ad-hoc values.  */
1240   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1241                                 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1242 }
1243
1244 /* string_concat's constructor.  */
1245
1246 string_concat::string_concat (int num, location_t *locs)
1247   : m_num (num)
1248 {
1249   m_locs = ggc_vec_alloc <location_t> (num);
1250   for (int i = 0; i < num; i++)
1251     m_locs[i] = locs[i];
1252 }
1253
1254 /* string_concat_db's constructor.  */
1255
1256 string_concat_db::string_concat_db ()
1257 {
1258   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1259 }
1260
1261 /* Record that a string concatenation occurred, covering NUM
1262    string literal tokens.  LOCS is an array of size NUM, containing the
1263    locations of the tokens.  A copy of LOCS is taken.  */
1264
1265 void
1266 string_concat_db::record_string_concatenation (int num, location_t *locs)
1267 {
1268   gcc_assert (num > 1);
1269   gcc_assert (locs);
1270
1271   location_t key_loc = get_key_loc (locs[0]);
1272
1273   string_concat *concat
1274     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1275   m_table->put (key_loc, concat);
1276 }
1277
1278 /* Determine if LOC was the location of the the initial token of a
1279    concatenation of string literal tokens.
1280    If so, *OUT_NUM is written to with the number of tokens, and
1281    *OUT_LOCS with the location of an array of locations of the
1282    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1283    storage owned by the string_concat_db.
1284    Otherwise, return false.  */
1285
1286 bool
1287 string_concat_db::get_string_concatenation (location_t loc,
1288                                             int *out_num,
1289                                             location_t **out_locs)
1290 {
1291   gcc_assert (out_num);
1292   gcc_assert (out_locs);
1293
1294   location_t key_loc = get_key_loc (loc);
1295
1296   string_concat **concat = m_table->get (key_loc);
1297   if (!concat)
1298     return false;
1299
1300   *out_num = (*concat)->m_num;
1301   *out_locs =(*concat)->m_locs;
1302   return true;
1303 }
1304
1305 /* Internal function.  Canonicalize LOC into a form suitable for
1306    use as a key within the database, stripping away macro expansion,
1307    ad-hoc information, and range information, using the location of
1308    the start of LOC within an ordinary linemap.  */
1309
1310 location_t
1311 string_concat_db::get_key_loc (location_t loc)
1312 {
1313   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1314                                   NULL);
1315
1316   loc = get_range_from_loc (line_table, loc).m_start;
1317
1318   return loc;
1319 }
1320
1321 /* Helper class for use within get_substring_ranges_for_loc.
1322    An vec of cpp_string with responsibility for releasing all of the
1323    str->text for each str in the vector.  */
1324
1325 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1326 {
1327  public:
1328   auto_cpp_string_vec (int alloc)
1329     : auto_vec <cpp_string> (alloc) {}
1330
1331   ~auto_cpp_string_vec ()
1332   {
1333     /* Clean up the copies within this vec.  */
1334     int i;
1335     cpp_string *str;
1336     FOR_EACH_VEC_ELT (*this, i, str)
1337       free (const_cast <unsigned char *> (str->text));
1338   }
1339 };
1340
1341 /* Attempt to populate RANGES with source location information on the
1342    individual characters within the string literal found at STRLOC.
1343    If CONCATS is non-NULL, then any string literals that the token at
1344    STRLOC  was concatenated with are also added to RANGES.
1345
1346    Return NULL if successful, or an error message if any errors occurred (in
1347    which case RANGES may be only partially populated and should not
1348    be used).
1349
1350    This is implemented by re-parsing the relevant source line(s).  */
1351
1352 static const char *
1353 get_substring_ranges_for_loc (cpp_reader *pfile,
1354                               string_concat_db *concats,
1355                               location_t strloc,
1356                               enum cpp_ttype type,
1357                               cpp_substring_ranges &ranges)
1358 {
1359   gcc_assert (pfile);
1360
1361   if (strloc == UNKNOWN_LOCATION)
1362     return "unknown location";
1363
1364   /* Reparsing the strings requires accurate location information.
1365      If -ftrack-macro-expansion has been overridden from its default
1366      of 2, then we might have a location of a macro expansion point,
1367      rather than the location of the literal itself.
1368      Avoid this by requiring that we have full macro expansion tracking
1369      for substring locations to be available.  */
1370   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1371     return "track_macro_expansion != 2";
1372
1373   /* If #line or # 44 "file"-style directives are present, then there's
1374      no guarantee that the line numbers we have can be used to locate
1375      the strings.  For example, we might have a .i file with # directives
1376      pointing back to lines within a .c file, but the .c file might
1377      have been edited since the .i file was created.
1378      In such a case, the safest course is to disable on-demand substring
1379      locations.  */
1380   if (line_table->seen_line_directive)
1381     return "seen line directive";
1382
1383   /* If string concatenation has occurred at STRLOC, get the locations
1384      of all of the literal tokens making up the compound string.
1385      Otherwise, just use STRLOC.  */
1386   int num_locs = 1;
1387   location_t *strlocs = &strloc;
1388   if (concats)
1389     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1390
1391   auto_cpp_string_vec strs (num_locs);
1392   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1393   for (int i = 0; i < num_locs; i++)
1394     {
1395       /* Get range of strloc.  We will use it to locate the start and finish
1396          of the literal token within the line.  */
1397       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1398
1399       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1400         {
1401           /* If the string token was within a macro expansion, then we can
1402              cope with it for the simple case where we have a single token.
1403              Otherwise, bail out.  */
1404           if (src_range.m_start != src_range.m_finish)
1405             return "macro expansion";
1406         }
1407       else
1408         {
1409           if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1410             /* If so, we can't reliably determine where the token started within
1411                its line.  */
1412             return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1413
1414           if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1415             /* If so, we can't reliably determine where the token finished
1416                within its line.  */
1417             return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1418         }
1419
1420       expanded_location start
1421         = expand_location_to_spelling_point (src_range.m_start,
1422                                              LOCATION_ASPECT_START);
1423       expanded_location finish
1424         = expand_location_to_spelling_point (src_range.m_finish,
1425                                              LOCATION_ASPECT_FINISH);
1426       if (start.file != finish.file)
1427         return "range endpoints are in different files";
1428       if (start.line != finish.line)
1429         return "range endpoints are on different lines";
1430       if (start.column > finish.column)
1431         return "range endpoints are reversed";
1432
1433       char_span line = location_get_source_line (start.file, start.line);
1434       if (!line)
1435         return "unable to read source line";
1436
1437       /* Determine the location of the literal (including quotes
1438          and leading prefix chars, such as the 'u' in a u""
1439          token).  */
1440       size_t literal_length = finish.column - start.column + 1;
1441
1442       /* Ensure that we don't crash if we got the wrong location.  */
1443       if (line.length () < (start.column - 1 + literal_length))
1444         return "line is not wide enough";
1445
1446       char_span literal = line.subspan (start.column - 1, literal_length);
1447
1448       cpp_string from;
1449       from.len = literal_length;
1450       /* Make a copy of the literal, to avoid having to rely on
1451          the lifetime of the copy of the line within the cache.
1452          This will be released by the auto_cpp_string_vec dtor.  */
1453       from.text = (unsigned char *)literal.xstrdup ();
1454       strs.safe_push (from);
1455
1456       /* For very long lines, a new linemap could have started
1457          halfway through the token.
1458          Ensure that the loc_reader uses the linemap of the
1459          *end* of the token for its start location.  */
1460       const line_map_ordinary *start_ord_map;
1461       linemap_resolve_location (line_table, src_range.m_start,
1462                                 LRK_SPELLING_LOCATION, &start_ord_map);
1463       const line_map_ordinary *final_ord_map;
1464       linemap_resolve_location (line_table, src_range.m_finish,
1465                                 LRK_SPELLING_LOCATION, &final_ord_map);
1466       /* Bulletproofing.  We ought to only have different ordinary maps
1467          for start vs finish due to line-length jumps.  */
1468       if (start_ord_map != final_ord_map
1469           && start_ord_map->to_file != final_ord_map->to_file)
1470           return "start and finish are spelled in different ordinary maps";
1471       location_t start_loc
1472         = linemap_position_for_line_and_column (line_table, final_ord_map,
1473                                                 start.line, start.column);
1474
1475       cpp_string_location_reader loc_reader (start_loc, line_table);
1476       loc_readers.safe_push (loc_reader);
1477     }
1478
1479   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1480   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1481                                                  loc_readers.address (),
1482                                                  num_locs, &ranges, type);
1483   if (err)
1484     return err;
1485
1486   /* Success: "ranges" should now contain information on the string.  */
1487   return NULL;
1488 }
1489
1490 /* Attempt to populate *OUT_LOC with source location information on the
1491    given characters within the string literal found at STRLOC.
1492    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1493    character set.
1494
1495    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1496    and string literal "012345\n789"
1497    *OUT_LOC is written to with:
1498      "012345\n789"
1499          ~^~~~~
1500
1501    If CONCATS is non-NULL, then any string literals that the token at
1502    STRLOC was concatenated with are also considered.
1503
1504    This is implemented by re-parsing the relevant source line(s).
1505
1506    Return NULL if successful, or an error message if any errors occurred.
1507    Error messages are intended for GCC developers (to help debugging) rather
1508    than for end-users.  */
1509
1510 const char *
1511 get_source_location_for_substring (cpp_reader *pfile,
1512                                    string_concat_db *concats,
1513                                    location_t strloc,
1514                                    enum cpp_ttype type,
1515                                    int caret_idx, int start_idx, int end_idx,
1516                                    source_location *out_loc)
1517 {
1518   gcc_checking_assert (caret_idx >= 0);
1519   gcc_checking_assert (start_idx >= 0);
1520   gcc_checking_assert (end_idx >= 0);
1521   gcc_assert (out_loc);
1522
1523   cpp_substring_ranges ranges;
1524   const char *err
1525     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1526   if (err)
1527     return err;
1528
1529   if (caret_idx >= ranges.get_num_ranges ())
1530     return "caret_idx out of range";
1531   if (start_idx >= ranges.get_num_ranges ())
1532     return "start_idx out of range";
1533   if (end_idx >= ranges.get_num_ranges ())
1534     return "end_idx out of range";
1535
1536   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1537                             ranges.get_range (start_idx).m_start,
1538                             ranges.get_range (end_idx).m_finish);
1539   return NULL;
1540 }
1541
1542 #if CHECKING_P
1543
1544 namespace selftest {
1545
1546 /* Selftests of location handling.  */
1547
1548 /* Attempt to populate *OUT_RANGE with source location information on the
1549    given character within the string literal found at STRLOC.
1550    CHAR_IDX refers to an offset within the execution character set.
1551    If CONCATS is non-NULL, then any string literals that the token at
1552    STRLOC was concatenated with are also considered.
1553
1554    This is implemented by re-parsing the relevant source line(s).
1555
1556    Return NULL if successful, or an error message if any errors occurred.
1557    Error messages are intended for GCC developers (to help debugging) rather
1558    than for end-users.  */
1559
1560 static const char *
1561 get_source_range_for_char (cpp_reader *pfile,
1562                            string_concat_db *concats,
1563                            location_t strloc,
1564                            enum cpp_ttype type,
1565                            int char_idx,
1566                            source_range *out_range)
1567 {
1568   gcc_checking_assert (char_idx >= 0);
1569   gcc_assert (out_range);
1570
1571   cpp_substring_ranges ranges;
1572   const char *err
1573     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1574   if (err)
1575     return err;
1576
1577   if (char_idx >= ranges.get_num_ranges ())
1578     return "char_idx out of range";
1579
1580   *out_range = ranges.get_range (char_idx);
1581   return NULL;
1582 }
1583
1584 /* As get_source_range_for_char, but write to *OUT the number
1585    of ranges that are available.  */
1586
1587 static const char *
1588 get_num_source_ranges_for_substring (cpp_reader *pfile,
1589                                      string_concat_db *concats,
1590                                      location_t strloc,
1591                                      enum cpp_ttype type,
1592                                      int *out)
1593 {
1594   gcc_assert (out);
1595
1596   cpp_substring_ranges ranges;
1597   const char *err
1598     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1599
1600   if (err)
1601     return err;
1602
1603   *out = ranges.get_num_ranges ();
1604   return NULL;
1605 }
1606
1607 /* Selftests of location handling.  */
1608
1609 /* Verify that compare() on linenum_type handles comparisons over the full
1610    range of the type.  */
1611
1612 static void
1613 test_linenum_comparisons ()
1614 {
1615   linenum_type min_line (0);
1616   linenum_type max_line (0xffffffff);
1617   ASSERT_EQ (0, compare (min_line, min_line));
1618   ASSERT_EQ (0, compare (max_line, max_line));
1619
1620   ASSERT_GT (compare (max_line, min_line), 0);
1621   ASSERT_LT (compare (min_line, max_line), 0);
1622 }
1623
1624 /* Helper function for verifying location data: when location_t
1625    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1626    as having column 0.  */
1627
1628 static bool
1629 should_have_column_data_p (location_t loc)
1630 {
1631   if (IS_ADHOC_LOC (loc))
1632     loc = get_location_from_adhoc_loc (line_table, loc);
1633   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1634     return false;
1635   return true;
1636 }
1637
1638 /* Selftest for should_have_column_data_p.  */
1639
1640 static void
1641 test_should_have_column_data_p ()
1642 {
1643   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1644   ASSERT_TRUE
1645     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1646   ASSERT_FALSE
1647     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1648 }
1649
1650 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1651    on LOC.  */
1652
1653 static void
1654 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1655               location_t loc)
1656 {
1657   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1658   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1659   /* If location_t values are sufficiently high, then column numbers
1660      will be unavailable and LOCATION_COLUMN (loc) will be 0.
1661      When close to the threshold, column numbers *may* be present: if
1662      the final linemap before the threshold contains a line that straddles
1663      the threshold, locations in that line have column information.  */
1664   if (should_have_column_data_p (loc))
1665     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1666 }
1667
1668 /* Various selftests involve constructing a line table and one or more
1669    line maps within it.
1670
1671    For maximum test coverage we want to run these tests with a variety
1672    of situations:
1673    - line_table->default_range_bits: some frontends use a non-zero value
1674    and others use zero
1675    - the fallback modes within line-map.c: there are various threshold
1676    values for source_location/location_t beyond line-map.c changes
1677    behavior (disabling of the range-packing optimization, disabling
1678    of column-tracking).  We can exercise these by starting the line_table
1679    at interesting values at or near these thresholds.
1680
1681    The following struct describes a particular case within our test
1682    matrix.  */
1683
1684 struct line_table_case
1685 {
1686   line_table_case (int default_range_bits, int base_location)
1687   : m_default_range_bits (default_range_bits),
1688     m_base_location (base_location)
1689   {}
1690
1691   int m_default_range_bits;
1692   int m_base_location;
1693 };
1694
1695 /* Constructor.  Store the old value of line_table, and create a new
1696    one, using sane defaults.  */
1697
1698 line_table_test::line_table_test ()
1699 {
1700   gcc_assert (saved_line_table == NULL);
1701   saved_line_table = line_table;
1702   line_table = ggc_alloc<line_maps> ();
1703   linemap_init (line_table, BUILTINS_LOCATION);
1704   gcc_assert (saved_line_table->reallocator);
1705   line_table->reallocator = saved_line_table->reallocator;
1706   gcc_assert (saved_line_table->round_alloc_size);
1707   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1708   line_table->default_range_bits = 0;
1709 }
1710
1711 /* Constructor.  Store the old value of line_table, and create a new
1712    one, using the sitation described in CASE_.  */
1713
1714 line_table_test::line_table_test (const line_table_case &case_)
1715 {
1716   gcc_assert (saved_line_table == NULL);
1717   saved_line_table = line_table;
1718   line_table = ggc_alloc<line_maps> ();
1719   linemap_init (line_table, BUILTINS_LOCATION);
1720   gcc_assert (saved_line_table->reallocator);
1721   line_table->reallocator = saved_line_table->reallocator;
1722   gcc_assert (saved_line_table->round_alloc_size);
1723   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1724   line_table->default_range_bits = case_.m_default_range_bits;
1725   if (case_.m_base_location)
1726     {
1727       line_table->highest_location = case_.m_base_location;
1728       line_table->highest_line = case_.m_base_location;
1729     }
1730 }
1731
1732 /* Destructor.  Restore the old value of line_table.  */
1733
1734 line_table_test::~line_table_test ()
1735 {
1736   gcc_assert (saved_line_table != NULL);
1737   line_table = saved_line_table;
1738   saved_line_table = NULL;
1739 }
1740
1741 /* Verify basic operation of ordinary linemaps.  */
1742
1743 static void
1744 test_accessing_ordinary_linemaps (const line_table_case &case_)
1745 {
1746   line_table_test ltt (case_);
1747
1748   /* Build a simple linemap describing some locations. */
1749   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1750
1751   linemap_line_start (line_table, 1, 100);
1752   location_t loc_a = linemap_position_for_column (line_table, 1);
1753   location_t loc_b = linemap_position_for_column (line_table, 23);
1754
1755   linemap_line_start (line_table, 2, 100);
1756   location_t loc_c = linemap_position_for_column (line_table, 1);
1757   location_t loc_d = linemap_position_for_column (line_table, 17);
1758
1759   /* Example of a very long line.  */
1760   linemap_line_start (line_table, 3, 2000);
1761   location_t loc_e = linemap_position_for_column (line_table, 700);
1762
1763   /* Transitioning back to a short line.  */
1764   linemap_line_start (line_table, 4, 0);
1765   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1766
1767   if (should_have_column_data_p (loc_back_to_short))
1768     {
1769       /* Verify that we switched to short lines in the linemap.  */
1770       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1771       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1772     }
1773
1774   /* Example of a line that will eventually be seen to be longer
1775      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1776      below that.  */
1777   linemap_line_start (line_table, 5, 2000);
1778
1779   location_t loc_start_of_very_long_line
1780     = linemap_position_for_column (line_table, 2000);
1781   location_t loc_too_wide
1782     = linemap_position_for_column (line_table, 4097);
1783   location_t loc_too_wide_2
1784     = linemap_position_for_column (line_table, 4098);
1785
1786   /* ...and back to a sane line length.  */
1787   linemap_line_start (line_table, 6, 100);
1788   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1789
1790   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1791
1792   /* Multiple files.  */
1793   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1794   linemap_line_start (line_table, 1, 200);
1795   location_t loc_f = linemap_position_for_column (line_table, 150);
1796   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1797
1798   /* Verify that we can recover the location info.  */
1799   assert_loceq ("foo.c", 1, 1, loc_a);
1800   assert_loceq ("foo.c", 1, 23, loc_b);
1801   assert_loceq ("foo.c", 2, 1, loc_c);
1802   assert_loceq ("foo.c", 2, 17, loc_d);
1803   assert_loceq ("foo.c", 3, 700, loc_e);
1804   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1805
1806   /* In the very wide line, the initial location should be fully tracked.  */
1807   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1808   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1809      be disabled.  */
1810   assert_loceq ("foo.c", 5, 0, loc_too_wide);
1811   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1812   /*...and column-tracking should be re-enabled for subsequent lines.  */
1813   assert_loceq ("foo.c", 6, 10, loc_sane_again);
1814
1815   assert_loceq ("bar.c", 1, 150, loc_f);
1816
1817   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1818   ASSERT_TRUE (pure_location_p (line_table, loc_a));
1819
1820   /* Verify using make_location to build a range, and extracting data
1821      back from it.  */
1822   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1823   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1824   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1825   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1826   ASSERT_EQ (loc_b, src_range.m_start);
1827   ASSERT_EQ (loc_d, src_range.m_finish);
1828 }
1829
1830 /* Verify various properties of UNKNOWN_LOCATION.  */
1831
1832 static void
1833 test_unknown_location ()
1834 {
1835   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1836   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1837   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1838 }
1839
1840 /* Verify various properties of BUILTINS_LOCATION.  */
1841
1842 static void
1843 test_builtins ()
1844 {
1845   assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1846   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1847 }
1848
1849 /* Regression test for make_location.
1850    Ensure that we use pure locations for the start/finish of the range,
1851    rather than storing a packed or ad-hoc range as the start/finish.  */
1852
1853 static void
1854 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1855 {
1856   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1857      with C++ frontend.
1858      ....................0000000001111111111222.
1859      ....................1234567890123456789012.  */
1860   const char *content = "     r += !aaa == bbb;\n";
1861   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1862   line_table_test ltt (case_);
1863   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1864
1865   const location_t c11 = linemap_position_for_column (line_table, 11);
1866   const location_t c12 = linemap_position_for_column (line_table, 12);
1867   const location_t c13 = linemap_position_for_column (line_table, 13);
1868   const location_t c14 = linemap_position_for_column (line_table, 14);
1869   const location_t c21 = linemap_position_for_column (line_table, 21);
1870
1871   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1872     return;
1873
1874   /* Use column 13 for the caret location, arbitrarily, to verify that we
1875      handle start != caret.  */
1876   const location_t aaa = make_location (c13, c12, c14);
1877   ASSERT_EQ (c13, get_pure_location (aaa));
1878   ASSERT_EQ (c12, get_start (aaa));
1879   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1880   ASSERT_EQ (c14, get_finish (aaa));
1881   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1882
1883   /* Make a location using a location with a range as the start-point.  */
1884   const location_t not_aaa = make_location (c11, aaa, c14);
1885   ASSERT_EQ (c11, get_pure_location (not_aaa));
1886   /* It should use the start location of the range, not store the range
1887      itself.  */
1888   ASSERT_EQ (c12, get_start (not_aaa));
1889   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1890   ASSERT_EQ (c14, get_finish (not_aaa));
1891   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1892
1893   /* Similarly, make a location with a range as the end-point.  */
1894   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1895   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1896   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1897   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1898   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1899   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1900   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1901   /* It should use the finish location of the range, not store the range
1902      itself.  */
1903   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1904   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1905   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1906   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1907   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1908 }
1909
1910 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
1911
1912 static void
1913 test_reading_source_line ()
1914 {
1915   /* Create a tempfile and write some text to it.  */
1916   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1917                         "01234567890123456789\n"
1918                         "This is the test text\n"
1919                         "This is the 3rd line");
1920
1921   /* Read back a specific line from the tempfile.  */
1922   char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1923   ASSERT_TRUE (source_line);
1924   ASSERT_TRUE (source_line.get_buffer () != NULL);
1925   ASSERT_EQ (20, source_line.length ());
1926   ASSERT_TRUE (!strncmp ("This is the 3rd line",
1927                          source_line.get_buffer (), source_line.length ()));
1928
1929   source_line = location_get_source_line (tmp.get_filename (), 2);
1930   ASSERT_TRUE (source_line);
1931   ASSERT_TRUE (source_line.get_buffer () != NULL);
1932   ASSERT_EQ (21, source_line.length ());
1933   ASSERT_TRUE (!strncmp ("This is the test text",
1934                          source_line.get_buffer (), source_line.length ()));
1935
1936   source_line = location_get_source_line (tmp.get_filename (), 4);
1937   ASSERT_FALSE (source_line);
1938   ASSERT_TRUE (source_line.get_buffer () == NULL);
1939 }
1940
1941 /* Tests of lexing.  */
1942
1943 /* Verify that token TOK from PARSER has cpp_token_as_text
1944    equal to EXPECTED_TEXT.  */
1945
1946 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
1947   SELFTEST_BEGIN_STMT                                                   \
1948     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
1949     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
1950   SELFTEST_END_STMT
1951
1952 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1953    and ranges from EXP_START_COL to EXP_FINISH_COL.
1954    Use LOC as the effective location of the selftest.  */
1955
1956 static void
1957 assert_token_loc_eq (const location &loc,
1958                      const cpp_token *tok,
1959                      const char *exp_filename, int exp_linenum,
1960                      int exp_start_col, int exp_finish_col)
1961 {
1962   location_t tok_loc = tok->src_loc;
1963   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1964   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1965
1966   /* If location_t values are sufficiently high, then column numbers
1967      will be unavailable.  */
1968   if (!should_have_column_data_p (tok_loc))
1969     return;
1970
1971   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1972   source_range tok_range = get_range_from_loc (line_table, tok_loc);
1973   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1974   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1975 }
1976
1977 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1978    SELFTEST_LOCATION as the effective location of the selftest.  */
1979
1980 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1981                             EXP_START_COL, EXP_FINISH_COL) \
1982   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1983                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1984
1985 /* Test of lexing a file using libcpp, verifying tokens and their
1986    location information.  */
1987
1988 static void
1989 test_lexer (const line_table_case &case_)
1990 {
1991   /* Create a tempfile and write some text to it.  */
1992   const char *content =
1993     /*00000000011111111112222222222333333.3333444444444.455555555556
1994       12345678901234567890123456789012345.6789012345678.901234567890.  */
1995     ("test_name /* c-style comment */\n"
1996      "                                  \"test literal\"\n"
1997      " // test c++-style comment\n"
1998      "   42\n");
1999   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2000
2001   line_table_test ltt (case_);
2002
2003   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2004
2005   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2006   ASSERT_NE (fname, NULL);
2007
2008   /* Verify that we get the expected tokens back, with the correct
2009      location information.  */
2010
2011   location_t loc;
2012   const cpp_token *tok;
2013   tok = cpp_get_token_with_location (parser, &loc);
2014   ASSERT_NE (tok, NULL);
2015   ASSERT_EQ (tok->type, CPP_NAME);
2016   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2017   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2018
2019   tok = cpp_get_token_with_location (parser, &loc);
2020   ASSERT_NE (tok, NULL);
2021   ASSERT_EQ (tok->type, CPP_STRING);
2022   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2023   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2024
2025   tok = cpp_get_token_with_location (parser, &loc);
2026   ASSERT_NE (tok, NULL);
2027   ASSERT_EQ (tok->type, CPP_NUMBER);
2028   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2029   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2030
2031   tok = cpp_get_token_with_location (parser, &loc);
2032   ASSERT_NE (tok, NULL);
2033   ASSERT_EQ (tok->type, CPP_EOF);
2034
2035   cpp_finish (parser, NULL);
2036   cpp_destroy (parser);
2037 }
2038
2039 /* Forward decls.  */
2040
2041 struct lexer_test;
2042 class lexer_test_options;
2043
2044 /* A class for specifying options of a lexer_test.
2045    The "apply" vfunc is called during the lexer_test constructor.  */
2046
2047 class lexer_test_options
2048 {
2049  public:
2050   virtual void apply (lexer_test &) = 0;
2051 };
2052
2053 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2054    in its dtor.
2055
2056    This is needed by struct lexer_test to ensure that the cleanup of the
2057    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2058
2059 class cpp_reader_ptr
2060 {
2061  public:
2062   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2063
2064   ~cpp_reader_ptr ()
2065   {
2066     cpp_finish (m_ptr, NULL);
2067     cpp_destroy (m_ptr);
2068   }
2069
2070   operator cpp_reader * () const { return m_ptr; }
2071
2072  private:
2073   cpp_reader *m_ptr;
2074 };
2075
2076 /* A struct for writing lexer tests.  */
2077
2078 struct lexer_test
2079 {
2080   lexer_test (const line_table_case &case_, const char *content,
2081               lexer_test_options *options);
2082   ~lexer_test ();
2083
2084   const cpp_token *get_token ();
2085
2086   /* The ordering of these fields matters.
2087      The line_table_test must be first, since the cpp_reader_ptr
2088      uses it.
2089      The cpp_reader must be cleaned up *after* the temp_source_file
2090      since the filenames in input.c's input cache are owned by the
2091      cpp_reader; in particular, when ~temp_source_file evicts the
2092      filename the filenames must still be alive.  */
2093   line_table_test m_ltt;
2094   cpp_reader_ptr m_parser;
2095   temp_source_file m_tempfile;
2096   string_concat_db m_concats;
2097   bool m_implicitly_expect_EOF;
2098 };
2099
2100 /* Use an EBCDIC encoding for the execution charset, specifically
2101    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2102
2103    This exercises iconv integration within libcpp.
2104    Not every build of iconv supports the given charset,
2105    so we need to flag this error and handle it gracefully.  */
2106
2107 class ebcdic_execution_charset : public lexer_test_options
2108 {
2109  public:
2110   ebcdic_execution_charset () : m_num_iconv_errors (0)
2111     {
2112       gcc_assert (s_singleton == NULL);
2113       s_singleton = this;
2114     }
2115   ~ebcdic_execution_charset ()
2116     {
2117       gcc_assert (s_singleton == this);
2118       s_singleton = NULL;
2119     }
2120
2121   void apply (lexer_test &test) FINAL OVERRIDE
2122   {
2123     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2124     cpp_opts->narrow_charset = "IBM1047";
2125
2126     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2127     callbacks->diagnostic = on_diagnostic;
2128   }
2129
2130   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2131                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2132                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2133                              rich_location *richloc ATTRIBUTE_UNUSED,
2134                              const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2135     ATTRIBUTE_FPTR_PRINTF(5,0)
2136   {
2137     gcc_assert (s_singleton);
2138     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2139     const char *msg = "conversion from %s to %s not supported by iconv";
2140 #ifdef ENABLE_NLS
2141     msg = dgettext ("cpplib", msg);
2142 #endif
2143     /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2144        when the local iconv build doesn't support the conversion.  */
2145     if (strcmp (msgid, msg) == 0)
2146       {
2147         s_singleton->m_num_iconv_errors++;
2148         return true;
2149       }
2150
2151     /* Otherwise, we have an unexpected error.  */
2152     abort ();
2153   }
2154
2155   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2156
2157  private:
2158   static ebcdic_execution_charset *s_singleton;
2159   int m_num_iconv_errors;
2160 };
2161
2162 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2163
2164 /* A lexer_test_options subclass that records a list of diagnostic
2165    messages emitted by the lexer.  */
2166
2167 class lexer_diagnostic_sink : public lexer_test_options
2168 {
2169  public:
2170   lexer_diagnostic_sink ()
2171   {
2172     gcc_assert (s_singleton == NULL);
2173     s_singleton = this;
2174   }
2175   ~lexer_diagnostic_sink ()
2176   {
2177     gcc_assert (s_singleton == this);
2178     s_singleton = NULL;
2179
2180     int i;
2181     char *str;
2182     FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2183       free (str);
2184   }
2185
2186   void apply (lexer_test &test) FINAL OVERRIDE
2187   {
2188     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2189     callbacks->diagnostic = on_diagnostic;
2190   }
2191
2192   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2193                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2194                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2195                              rich_location *richloc ATTRIBUTE_UNUSED,
2196                              const char *msgid, va_list *ap)
2197     ATTRIBUTE_FPTR_PRINTF(5,0)
2198   {
2199     char *msg = xvasprintf (msgid, *ap);
2200     s_singleton->m_diagnostics.safe_push (msg);
2201     return true;
2202   }
2203
2204   auto_vec<char *> m_diagnostics;
2205
2206  private:
2207   static lexer_diagnostic_sink *s_singleton;
2208 };
2209
2210 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2211
2212 /* Constructor.  Override line_table with a new instance based on CASE_,
2213    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2214    start parsing the tempfile.  */
2215
2216 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2217                         lexer_test_options *options)
2218 : m_ltt (case_),
2219   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2220   /* Create a tempfile and write the text to it.  */
2221   m_tempfile (SELFTEST_LOCATION, ".c", content),
2222   m_concats (),
2223   m_implicitly_expect_EOF (true)
2224 {
2225   if (options)
2226     options->apply (*this);
2227
2228   cpp_init_iconv (m_parser);
2229
2230   /* Parse the file.  */
2231   const char *fname = cpp_read_main_file (m_parser,
2232                                           m_tempfile.get_filename ());
2233   ASSERT_NE (fname, NULL);
2234 }
2235
2236 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2237
2238 lexer_test::~lexer_test ()
2239 {
2240   location_t loc;
2241   const cpp_token *tok;
2242
2243   if (m_implicitly_expect_EOF)
2244     {
2245       tok = cpp_get_token_with_location (m_parser, &loc);
2246       ASSERT_NE (tok, NULL);
2247       ASSERT_EQ (tok->type, CPP_EOF);
2248     }
2249 }
2250
2251 /* Get the next token from m_parser.  */
2252
2253 const cpp_token *
2254 lexer_test::get_token ()
2255 {
2256   location_t loc;
2257   const cpp_token *tok;
2258
2259   tok = cpp_get_token_with_location (m_parser, &loc);
2260   ASSERT_NE (tok, NULL);
2261   return tok;
2262 }
2263
2264 /* Verify that locations within string literals are correctly handled.  */
2265
2266 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2267    using the string concatenation database for TEST.
2268
2269    Assert that the character at index IDX is on EXPECTED_LINE,
2270    and that it begins at column EXPECTED_START_COL and ends at
2271    EXPECTED_FINISH_COL (unless the locations are beyond
2272    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2273    columns).  */
2274
2275 static void
2276 assert_char_at_range (const location &loc,
2277                       lexer_test& test,
2278                       location_t strloc, enum cpp_ttype type, int idx,
2279                       int expected_line, int expected_start_col,
2280                       int expected_finish_col)
2281 {
2282   cpp_reader *pfile = test.m_parser;
2283   string_concat_db *concats = &test.m_concats;
2284
2285   source_range actual_range = source_range();
2286   const char *err
2287     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2288                                  &actual_range);
2289   if (should_have_column_data_p (strloc))
2290     ASSERT_EQ_AT (loc, NULL, err);
2291   else
2292     {
2293       ASSERT_STREQ_AT (loc,
2294                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2295                        err);
2296       return;
2297     }
2298
2299   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2300   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2301   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2302   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2303
2304   if (should_have_column_data_p (actual_range.m_start))
2305     {
2306       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2307       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2308     }
2309   if (should_have_column_data_p (actual_range.m_finish))
2310     {
2311       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2312       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2313     }
2314 }
2315
2316 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2317    the effective location of any errors.  */
2318
2319 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2320                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2321   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2322                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2323                         (EXPECTED_FINISH_COL))
2324
2325 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2326    using the string concatenation database for TEST.
2327
2328    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2329
2330 static void
2331 assert_num_substring_ranges (const location &loc,
2332                              lexer_test& test,
2333                              location_t strloc,
2334                              enum cpp_ttype type,
2335                              int expected_num_ranges)
2336 {
2337   cpp_reader *pfile = test.m_parser;
2338   string_concat_db *concats = &test.m_concats;
2339
2340   int actual_num_ranges = -1;
2341   const char *err
2342     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2343                                            &actual_num_ranges);
2344   if (should_have_column_data_p (strloc))
2345     ASSERT_EQ_AT (loc, NULL, err);
2346   else
2347     {
2348       ASSERT_STREQ_AT (loc,
2349                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2350                        err);
2351       return;
2352     }
2353   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2354 }
2355
2356 /* Macro for calling assert_num_substring_ranges, supplying
2357    SELFTEST_LOCATION for the effective location of any errors.  */
2358
2359 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2360                                     EXPECTED_NUM_RANGES)                \
2361   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2362                                (TYPE), (EXPECTED_NUM_RANGES))
2363
2364
2365 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2366    returns an error (using the string concatenation database for TEST).  */
2367
2368 static void
2369 assert_has_no_substring_ranges (const location &loc,
2370                                 lexer_test& test,
2371                                 location_t strloc,
2372                                 enum cpp_ttype type,
2373                                 const char *expected_err)
2374 {
2375   cpp_reader *pfile = test.m_parser;
2376   string_concat_db *concats = &test.m_concats;
2377   cpp_substring_ranges ranges;
2378   const char *actual_err
2379     = get_substring_ranges_for_loc (pfile, concats, strloc,
2380                                     type, ranges);
2381   if (should_have_column_data_p (strloc))
2382     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2383   else
2384     ASSERT_STREQ_AT (loc,
2385                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2386                      actual_err);
2387 }
2388
2389 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2390     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2391                                     (STRLOC), (TYPE), (ERR))
2392
2393 /* Lex a simple string literal.  Verify the substring location data, before
2394    and after running cpp_interpret_string on it.  */
2395
2396 static void
2397 test_lexer_string_locations_simple (const line_table_case &case_)
2398 {
2399   /* Digits 0-9 (with 0 at column 10), the simple way.
2400      ....................000000000.11111111112.2222222223333333333
2401      ....................123456789.01234567890.1234567890123456789
2402      We add a trailing comment to ensure that we correctly locate
2403      the end of the string literal token.  */
2404   const char *content = "        \"0123456789\" /* not a string */\n";
2405   lexer_test test (case_, content, NULL);
2406
2407   /* Verify that we get the expected token back, with the correct
2408      location information.  */
2409   const cpp_token *tok = test.get_token ();
2410   ASSERT_EQ (tok->type, CPP_STRING);
2411   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2412   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2413
2414   /* At this point in lexing, the quote characters are treated as part of
2415      the string (they are stripped off by cpp_interpret_string).  */
2416
2417   ASSERT_EQ (tok->val.str.len, 12);
2418
2419   /* Verify that cpp_interpret_string works.  */
2420   cpp_string dst_string;
2421   const enum cpp_ttype type = CPP_STRING;
2422   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2423                                       &dst_string, type);
2424   ASSERT_TRUE (result);
2425   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2426   free (const_cast <unsigned char *> (dst_string.text));
2427
2428   /* Verify ranges of individual characters.  This no longer includes the
2429      opening quote, but does include the closing quote.  */
2430   for (int i = 0; i <= 10; i++)
2431     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2432                           10 + i, 10 + i);
2433
2434   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2435 }
2436
2437 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2438    encoding.  */
2439
2440 static void
2441 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2442 {
2443   /* EBCDIC support requires iconv.  */
2444   if (!HAVE_ICONV)
2445     return;
2446
2447   /* Digits 0-9 (with 0 at column 10), the simple way.
2448      ....................000000000.11111111112.2222222223333333333
2449      ....................123456789.01234567890.1234567890123456789
2450      We add a trailing comment to ensure that we correctly locate
2451      the end of the string literal token.  */
2452   const char *content = "        \"0123456789\" /* not a string */\n";
2453   ebcdic_execution_charset use_ebcdic;
2454   lexer_test test (case_, content, &use_ebcdic);
2455
2456   /* Verify that we get the expected token back, with the correct
2457      location information.  */
2458   const cpp_token *tok = test.get_token ();
2459   ASSERT_EQ (tok->type, CPP_STRING);
2460   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2461   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2462
2463   /* At this point in lexing, the quote characters are treated as part of
2464      the string (they are stripped off by cpp_interpret_string).  */
2465
2466   ASSERT_EQ (tok->val.str.len, 12);
2467
2468   /* The remainder of the test requires an iconv implementation that
2469      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2470   if (use_ebcdic.iconv_errors_occurred_p ())
2471     return;
2472
2473   /* Verify that cpp_interpret_string works.  */
2474   cpp_string dst_string;
2475   const enum cpp_ttype type = CPP_STRING;
2476   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2477                                       &dst_string, type);
2478   ASSERT_TRUE (result);
2479   /* We should now have EBCDIC-encoded text, specifically
2480      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2481      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2482   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2483                 (const char *)dst_string.text);
2484   free (const_cast <unsigned char *> (dst_string.text));
2485
2486   /* Verify that we don't attempt to record substring location information
2487      for such cases.  */
2488   ASSERT_HAS_NO_SUBSTRING_RANGES
2489     (test, tok->src_loc, type,
2490      "execution character set != source character set");
2491 }
2492
2493 /* Lex a string literal containing a hex-escaped character.
2494    Verify the substring location data, before and after running
2495    cpp_interpret_string on it.  */
2496
2497 static void
2498 test_lexer_string_locations_hex (const line_table_case &case_)
2499 {
2500   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2501      and with a space in place of digit 6, to terminate the escaped
2502      hex code.
2503      ....................000000000.111111.11112222.
2504      ....................123456789.012345.67890123.  */
2505   const char *content = "        \"01234\\x35 789\"\n";
2506   lexer_test test (case_, content, NULL);
2507
2508   /* Verify that we get the expected token back, with the correct
2509      location information.  */
2510   const cpp_token *tok = test.get_token ();
2511   ASSERT_EQ (tok->type, CPP_STRING);
2512   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2513   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2514
2515   /* At this point in lexing, the quote characters are treated as part of
2516      the string (they are stripped off by cpp_interpret_string).  */
2517   ASSERT_EQ (tok->val.str.len, 15);
2518
2519   /* Verify that cpp_interpret_string works.  */
2520   cpp_string dst_string;
2521   const enum cpp_ttype type = CPP_STRING;
2522   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2523                                       &dst_string, type);
2524   ASSERT_TRUE (result);
2525   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2526   free (const_cast <unsigned char *> (dst_string.text));
2527
2528   /* Verify ranges of individual characters.  This no longer includes the
2529      opening quote, but does include the closing quote.  */
2530   for (int i = 0; i <= 4; i++)
2531     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2532   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2533   for (int i = 6; i <= 10; i++)
2534     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2535
2536   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2537 }
2538
2539 /* Lex a string literal containing an octal-escaped character.
2540    Verify the substring location data after running cpp_interpret_string
2541    on it.  */
2542
2543 static void
2544 test_lexer_string_locations_oct (const line_table_case &case_)
2545 {
2546   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2547      and with a space in place of digit 6, to terminate the escaped
2548      octal code.
2549      ....................000000000.111111.11112222.2222223333333333444
2550      ....................123456789.012345.67890123.4567890123456789012  */
2551   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2552   lexer_test test (case_, content, NULL);
2553
2554   /* Verify that we get the expected token back, with the correct
2555      location information.  */
2556   const cpp_token *tok = test.get_token ();
2557   ASSERT_EQ (tok->type, CPP_STRING);
2558   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2559
2560   /* Verify that cpp_interpret_string works.  */
2561   cpp_string dst_string;
2562   const enum cpp_ttype type = CPP_STRING;
2563   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2564                                       &dst_string, type);
2565   ASSERT_TRUE (result);
2566   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2567   free (const_cast <unsigned char *> (dst_string.text));
2568
2569   /* Verify ranges of individual characters.  This no longer includes the
2570      opening quote, but does include the closing quote.  */
2571   for (int i = 0; i < 5; i++)
2572     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2573   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2574   for (int i = 6; i <= 10; i++)
2575     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2576
2577   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2578 }
2579
2580 /* Test of string literal containing letter escapes.  */
2581
2582 static void
2583 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2584 {
2585   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2586      .....................000000000.1.11111.1.1.11222.22222223333333
2587      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2588   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2589   lexer_test test (case_, content, NULL);
2590
2591   /* Verify that we get the expected tokens back.  */
2592   const cpp_token *tok = test.get_token ();
2593   ASSERT_EQ (tok->type, CPP_STRING);
2594   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2595
2596   /* Verify ranges of individual characters. */
2597   /* "\t".  */
2598   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2599                         0, 1, 10, 11);
2600   /* "foo". */
2601   for (int i = 1; i <= 3; i++)
2602     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2603                           i, 1, 11 + i, 11 + i);
2604   /* "\\" and "\n".  */
2605   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2606                         4, 1, 15, 16);
2607   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2608                         5, 1, 17, 18);
2609
2610   /* "bar" and closing quote for nul-terminator.  */
2611   for (int i = 6; i <= 9; i++)
2612     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2613                           i, 1, 13 + i, 13 + i);
2614
2615   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2616 }
2617
2618 /* Another test of a string literal containing a letter escape.
2619    Based on string seen in
2620      printf ("%-%\n");
2621    in gcc.dg/format/c90-printf-1.c.  */
2622
2623 static void
2624 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2625 {
2626   /* .....................000000000.1111.11.1111.22222222223.
2627      .....................123456789.0123.45.6789.01234567890.  */
2628   const char *content = ("        \"%-%\\n\" /* non-str */\n");
2629   lexer_test test (case_, content, NULL);
2630
2631   /* Verify that we get the expected tokens back.  */
2632   const cpp_token *tok = test.get_token ();
2633   ASSERT_EQ (tok->type, CPP_STRING);
2634   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2635
2636   /* Verify ranges of individual characters. */
2637   /* "%-%".  */
2638   for (int i = 0; i < 3; i++)
2639     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2640                           i, 1, 10 + i, 10 + i);
2641   /* "\n".  */
2642   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2643                         3, 1, 13, 14);
2644
2645   /* Closing quote for nul-terminator.  */
2646   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2647                         4, 1, 15, 15);
2648
2649   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2650 }
2651
2652 /* Lex a string literal containing UCN 4 characters.
2653    Verify the substring location data after running cpp_interpret_string
2654    on it.  */
2655
2656 static void
2657 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2658 {
2659   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2660      as UCN 4.
2661      ....................000000000.111111.111122.222222223.33333333344444
2662      ....................123456789.012345.678901.234567890.12345678901234  */
2663   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
2664   lexer_test test (case_, content, NULL);
2665
2666   /* Verify that we get the expected token back, with the correct
2667      location information.  */
2668   const cpp_token *tok = test.get_token ();
2669   ASSERT_EQ (tok->type, CPP_STRING);
2670   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2671
2672   /* Verify that cpp_interpret_string works.
2673      The string should be encoded in the execution character
2674      set.  Assuming that that is UTF-8, we should have the following:
2675      -----------  ----  -----  -------  ----------------
2676      Byte offset  Byte  Octal  Unicode  Source Column(s)
2677      -----------  ----  -----  -------  ----------------
2678      0            0x30         '0'      10
2679      1            0x31         '1'      11
2680      2            0x32         '2'      12
2681      3            0x33         '3'      13
2682      4            0x34         '4'      14
2683      5            0xE2  \342   U+2174   15-20
2684      6            0x85  \205    (cont)  15-20
2685      7            0xB4  \264    (cont)  15-20
2686      8            0xE2  \342   U+2175   21-26
2687      9            0x85  \205    (cont)  21-26
2688      10           0xB5  \265    (cont)  21-26
2689      11           0x37         '7'      27
2690      12           0x38         '8'      28
2691      13           0x39         '9'      29
2692      14           0x00                  30 (closing quote)
2693      -----------  ----  -----  -------  ---------------.  */
2694
2695   cpp_string dst_string;
2696   const enum cpp_ttype type = CPP_STRING;
2697   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2698                                       &dst_string, type);
2699   ASSERT_TRUE (result);
2700   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2701                 (const char *)dst_string.text);
2702   free (const_cast <unsigned char *> (dst_string.text));
2703
2704   /* Verify ranges of individual characters.  This no longer includes the
2705      opening quote, but does include the closing quote.
2706      '01234'.  */
2707   for (int i = 0; i <= 4; i++)
2708     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2709   /* U+2174.  */
2710   for (int i = 5; i <= 7; i++)
2711     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2712   /* U+2175.  */
2713   for (int i = 8; i <= 10; i++)
2714     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2715   /* '789' and nul terminator  */
2716   for (int i = 11; i <= 14; i++)
2717     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2718
2719   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2720 }
2721
2722 /* Lex a string literal containing UCN 8 characters.
2723    Verify the substring location data after running cpp_interpret_string
2724    on it.  */
2725
2726 static void
2727 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2728 {
2729   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2730      ....................000000000.111111.1111222222.2222333333333.344444
2731      ....................123456789.012345.6789012345.6789012345678.901234  */
2732   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
2733   lexer_test test (case_, content, NULL);
2734
2735   /* Verify that we get the expected token back, with the correct
2736      location information.  */
2737   const cpp_token *tok = test.get_token ();
2738   ASSERT_EQ (tok->type, CPP_STRING);
2739   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2740                            "\"01234\\U00002174\\U00002175789\"");
2741
2742   /* Verify that cpp_interpret_string works.
2743      The UTF-8 encoding of the string is identical to that from
2744      the ucn4 testcase above; the only difference is the column
2745      locations.  */
2746   cpp_string dst_string;
2747   const enum cpp_ttype type = CPP_STRING;
2748   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2749                                       &dst_string, type);
2750   ASSERT_TRUE (result);
2751   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2752                 (const char *)dst_string.text);
2753   free (const_cast <unsigned char *> (dst_string.text));
2754
2755   /* Verify ranges of individual characters.  This no longer includes the
2756      opening quote, but does include the closing quote.
2757      '01234'.  */
2758   for (int i = 0; i <= 4; i++)
2759     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2760   /* U+2174.  */
2761   for (int i = 5; i <= 7; i++)
2762     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2763   /* U+2175.  */
2764   for (int i = 8; i <= 10; i++)
2765     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2766   /* '789' at columns 35-37  */
2767   for (int i = 11; i <= 13; i++)
2768     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2769   /* Closing quote/nul-terminator at column 38.  */
2770   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2771
2772   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2773 }
2774
2775 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
2776
2777 static uint32_t
2778 uint32_from_big_endian (const uint32_t *ptr_be_value)
2779 {
2780   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2781   return (((uint32_t) buf[0] << 24)
2782           | ((uint32_t) buf[1] << 16)
2783           | ((uint32_t) buf[2] << 8)
2784           | (uint32_t) buf[3]);
2785 }
2786
2787 /* Lex a wide string literal and verify that attempts to read substring
2788    location data from it fail gracefully.  */
2789
2790 static void
2791 test_lexer_string_locations_wide_string (const line_table_case &case_)
2792 {
2793   /* Digits 0-9.
2794      ....................000000000.11111111112.22222222233333
2795      ....................123456789.01234567890.12345678901234  */
2796   const char *content = "       L\"0123456789\" /* non-str */\n";
2797   lexer_test test (case_, content, NULL);
2798
2799   /* Verify that we get the expected token back, with the correct
2800      location information.  */
2801   const cpp_token *tok = test.get_token ();
2802   ASSERT_EQ (tok->type, CPP_WSTRING);
2803   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2804
2805   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
2806   cpp_string dst_string;
2807   const enum cpp_ttype type = CPP_WSTRING;
2808   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2809                                       &dst_string, type);
2810   ASSERT_TRUE (result);
2811   /* The cpp_reader defaults to big-endian with
2812      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2813      now be encoded as UTF-32BE.  */
2814   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2815   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2816   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2817   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2818   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2819   free (const_cast <unsigned char *> (dst_string.text));
2820
2821   /* We don't yet support generating substring location information
2822      for L"" strings.  */
2823   ASSERT_HAS_NO_SUBSTRING_RANGES
2824     (test, tok->src_loc, type,
2825      "execution character set != source character set");
2826 }
2827
2828 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
2829
2830 static uint16_t
2831 uint16_from_big_endian (const uint16_t *ptr_be_value)
2832 {
2833   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2834   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2835 }
2836
2837 /* Lex a u"" string literal and verify that attempts to read substring
2838    location data from it fail gracefully.  */
2839
2840 static void
2841 test_lexer_string_locations_string16 (const line_table_case &case_)
2842 {
2843   /* Digits 0-9.
2844      ....................000000000.11111111112.22222222233333
2845      ....................123456789.01234567890.12345678901234  */
2846   const char *content = "       u\"0123456789\" /* non-str */\n";
2847   lexer_test test (case_, content, NULL);
2848
2849   /* Verify that we get the expected token back, with the correct
2850      location information.  */
2851   const cpp_token *tok = test.get_token ();
2852   ASSERT_EQ (tok->type, CPP_STRING16);
2853   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2854
2855   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
2856   cpp_string dst_string;
2857   const enum cpp_ttype type = CPP_STRING16;
2858   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2859                                       &dst_string, type);
2860   ASSERT_TRUE (result);
2861
2862   /* The cpp_reader defaults to big-endian, so dst_string should
2863      now be encoded as UTF-16BE.  */
2864   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2865   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2866   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2867   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2868   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2869   free (const_cast <unsigned char *> (dst_string.text));
2870
2871   /* We don't yet support generating substring location information
2872      for L"" strings.  */
2873   ASSERT_HAS_NO_SUBSTRING_RANGES
2874     (test, tok->src_loc, type,
2875      "execution character set != source character set");
2876 }
2877
2878 /* Lex a U"" string literal and verify that attempts to read substring
2879    location data from it fail gracefully.  */
2880
2881 static void
2882 test_lexer_string_locations_string32 (const line_table_case &case_)
2883 {
2884   /* Digits 0-9.
2885      ....................000000000.11111111112.22222222233333
2886      ....................123456789.01234567890.12345678901234  */
2887   const char *content = "       U\"0123456789\" /* non-str */\n";
2888   lexer_test test (case_, content, NULL);
2889
2890   /* Verify that we get the expected token back, with the correct
2891      location information.  */
2892   const cpp_token *tok = test.get_token ();
2893   ASSERT_EQ (tok->type, CPP_STRING32);
2894   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2895
2896   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
2897   cpp_string dst_string;
2898   const enum cpp_ttype type = CPP_STRING32;
2899   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2900                                       &dst_string, type);
2901   ASSERT_TRUE (result);
2902
2903   /* The cpp_reader defaults to big-endian, so dst_string should
2904      now be encoded as UTF-32BE.  */
2905   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2906   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2907   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2908   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2909   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2910   free (const_cast <unsigned char *> (dst_string.text));
2911
2912   /* We don't yet support generating substring location information
2913      for L"" strings.  */
2914   ASSERT_HAS_NO_SUBSTRING_RANGES
2915     (test, tok->src_loc, type,
2916      "execution character set != source character set");
2917 }
2918
2919 /* Lex a u8-string literal.
2920    Verify the substring location data after running cpp_interpret_string
2921    on it.  */
2922
2923 static void
2924 test_lexer_string_locations_u8 (const line_table_case &case_)
2925 {
2926   /* Digits 0-9.
2927      ....................000000000.11111111112.22222222233333
2928      ....................123456789.01234567890.12345678901234  */
2929   const char *content = "      u8\"0123456789\" /* non-str */\n";
2930   lexer_test test (case_, content, NULL);
2931
2932   /* Verify that we get the expected token back, with the correct
2933      location information.  */
2934   const cpp_token *tok = test.get_token ();
2935   ASSERT_EQ (tok->type, CPP_UTF8STRING);
2936   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2937
2938   /* Verify that cpp_interpret_string works.  */
2939   cpp_string dst_string;
2940   const enum cpp_ttype type = CPP_STRING;
2941   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2942                                       &dst_string, type);
2943   ASSERT_TRUE (result);
2944   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2945   free (const_cast <unsigned char *> (dst_string.text));
2946
2947   /* Verify ranges of individual characters.  This no longer includes the
2948      opening quote, but does include the closing quote.  */
2949   for (int i = 0; i <= 10; i++)
2950     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2951 }
2952
2953 /* Lex a string literal containing UTF-8 source characters.
2954    Verify the substring location data after running cpp_interpret_string
2955    on it.  */
2956
2957 static void
2958 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2959 {
2960  /* This string literal is written out to the source file as UTF-8,
2961     and is of the form "before mojibake after", where "mojibake"
2962     is written as the following four unicode code points:
2963        U+6587 CJK UNIFIED IDEOGRAPH-6587
2964        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2965        U+5316 CJK UNIFIED IDEOGRAPH-5316
2966        U+3051 HIRAGANA LETTER KE.
2967      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2968      "before" and "after" are 1 byte per unicode character.
2969
2970      The numbering shown are "columns", which are *byte* numbers within
2971      the line, rather than unicode character numbers.
2972
2973      .................... 000000000.1111111.
2974      .................... 123456789.0123456.  */
2975   const char *content = ("        \"before "
2976                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2977                               UTF-8: 0xE6 0x96 0x87
2978                               C octal escaped UTF-8: \346\226\207
2979                             "column" numbers: 17-19.  */
2980                          "\346\226\207"
2981
2982                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2983                               UTF-8: 0xE5 0xAD 0x97
2984                               C octal escaped UTF-8: \345\255\227
2985                             "column" numbers: 20-22.  */
2986                          "\345\255\227"
2987
2988                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2989                               UTF-8: 0xE5 0x8C 0x96
2990                               C octal escaped UTF-8: \345\214\226
2991                             "column" numbers: 23-25.  */
2992                          "\345\214\226"
2993
2994                          /* U+3051 HIRAGANA LETTER KE
2995                               UTF-8: 0xE3 0x81 0x91
2996                               C octal escaped UTF-8: \343\201\221
2997                             "column" numbers: 26-28.  */
2998                          "\343\201\221"
2999
3000                          /* column numbers 29 onwards
3001                           2333333.33334444444444
3002                           9012345.67890123456789. */
3003                          " after\" /* non-str */\n");
3004   lexer_test test (case_, content, NULL);
3005
3006   /* Verify that we get the expected token back, with the correct
3007      location information.  */
3008   const cpp_token *tok = test.get_token ();
3009   ASSERT_EQ (tok->type, CPP_STRING);
3010   ASSERT_TOKEN_AS_TEXT_EQ
3011     (test.m_parser, tok,
3012      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3013
3014   /* Verify that cpp_interpret_string works.  */
3015   cpp_string dst_string;
3016   const enum cpp_ttype type = CPP_STRING;
3017   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3018                                       &dst_string, type);
3019   ASSERT_TRUE (result);
3020   ASSERT_STREQ
3021     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3022      (const char *)dst_string.text);
3023   free (const_cast <unsigned char *> (dst_string.text));
3024
3025   /* Verify ranges of individual characters.  This no longer includes the
3026      opening quote, but does include the closing quote.
3027      Assuming that both source and execution encodings are UTF-8, we have
3028      a run of 25 octets in each, plus the NUL terminator.  */
3029   for (int i = 0; i < 25; i++)
3030     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3031   /* NUL-terminator should use the closing quote at column 35.  */
3032   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3033
3034   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3035 }
3036
3037 /* Test of string literal concatenation.  */
3038
3039 static void
3040 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3041 {
3042   /* Digits 0-9.
3043      .....................000000000.111111.11112222222222
3044      .....................123456789.012345.67890123456789.  */
3045   const char *content = ("        \"01234\" /* non-str */\n"
3046                          "        \"56789\" /* non-str */\n");
3047   lexer_test test (case_, content, NULL);
3048
3049   location_t input_locs[2];
3050
3051   /* Verify that we get the expected tokens back.  */
3052   auto_vec <cpp_string> input_strings;
3053   const cpp_token *tok_a = test.get_token ();
3054   ASSERT_EQ (tok_a->type, CPP_STRING);
3055   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3056   input_strings.safe_push (tok_a->val.str);
3057   input_locs[0] = tok_a->src_loc;
3058
3059   const cpp_token *tok_b = test.get_token ();
3060   ASSERT_EQ (tok_b->type, CPP_STRING);
3061   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3062   input_strings.safe_push (tok_b->val.str);
3063   input_locs[1] = tok_b->src_loc;
3064
3065   /* Verify that cpp_interpret_string works.  */
3066   cpp_string dst_string;
3067   const enum cpp_ttype type = CPP_STRING;
3068   bool result = cpp_interpret_string (test.m_parser,
3069                                       input_strings.address (), 2,
3070                                       &dst_string, type);
3071   ASSERT_TRUE (result);
3072   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3073   free (const_cast <unsigned char *> (dst_string.text));
3074
3075   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3076   test.m_concats.record_string_concatenation (2, input_locs);
3077
3078   location_t initial_loc = input_locs[0];
3079
3080   /* "01234" on line 1.  */
3081   for (int i = 0; i <= 4; i++)
3082     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3083   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3084   for (int i = 5; i <= 10; i++)
3085     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3086
3087   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3088 }
3089
3090 /* Another test of string literal concatenation.  */
3091
3092 static void
3093 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3094 {
3095   /* Digits 0-9.
3096      .....................000000000.111.11111112222222
3097      .....................123456789.012.34567890123456.  */
3098   const char *content = ("        \"01\" /* non-str */\n"
3099                          "        \"23\" /* non-str */\n"
3100                          "        \"45\" /* non-str */\n"
3101                          "        \"67\" /* non-str */\n"
3102                          "        \"89\" /* non-str */\n");
3103   lexer_test test (case_, content, NULL);
3104
3105   auto_vec <cpp_string> input_strings;
3106   location_t input_locs[5];
3107
3108   /* Verify that we get the expected tokens back.  */
3109   for (int i = 0; i < 5; i++)
3110     {
3111       const cpp_token *tok = test.get_token ();
3112       ASSERT_EQ (tok->type, CPP_STRING);
3113       input_strings.safe_push (tok->val.str);
3114       input_locs[i] = tok->src_loc;
3115     }
3116
3117   /* Verify that cpp_interpret_string works.  */
3118   cpp_string dst_string;
3119   const enum cpp_ttype type = CPP_STRING;
3120   bool result = cpp_interpret_string (test.m_parser,
3121                                       input_strings.address (), 5,
3122                                       &dst_string, type);
3123   ASSERT_TRUE (result);
3124   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3125   free (const_cast <unsigned char *> (dst_string.text));
3126
3127   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3128   test.m_concats.record_string_concatenation (5, input_locs);
3129
3130   location_t initial_loc = input_locs[0];
3131
3132   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3133      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3134      and expect get_source_range_for_substring to fail.
3135      However, for a string concatenation test, we can have a case
3136      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3137      but subsequent strings can be after it.
3138      Attempting to detect this within assert_char_at_range
3139      would overcomplicate the logic for the common test cases, so
3140      we detect it here.  */
3141   if (should_have_column_data_p (input_locs[0])
3142       && !should_have_column_data_p (input_locs[4]))
3143     {
3144       /* Verify that get_source_range_for_substring gracefully rejects
3145          this case.  */
3146       source_range actual_range;
3147       const char *err
3148         = get_source_range_for_char (test.m_parser, &test.m_concats,
3149                                      initial_loc, type, 0, &actual_range);
3150       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3151       return;
3152     }
3153
3154   for (int i = 0; i < 5; i++)
3155     for (int j = 0; j < 2; j++)
3156       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3157                             i + 1, 10 + j, 10 + j);
3158
3159   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3160   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3161
3162   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3163 }
3164
3165 /* Another test of string literal concatenation, this time combined with
3166    various kinds of escaped characters.  */
3167
3168 static void
3169 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3170 {
3171   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3172      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3173   const char *content
3174     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3175        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3176     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3177   lexer_test test (case_, content, NULL);
3178
3179   auto_vec <cpp_string> input_strings;
3180   location_t input_locs[4];
3181
3182   /* Verify that we get the expected tokens back.  */
3183   for (int i = 0; i < 4; i++)
3184     {
3185       const cpp_token *tok = test.get_token ();
3186       ASSERT_EQ (tok->type, CPP_STRING);
3187       input_strings.safe_push (tok->val.str);
3188       input_locs[i] = tok->src_loc;
3189     }
3190
3191   /* Verify that cpp_interpret_string works.  */
3192   cpp_string dst_string;
3193   const enum cpp_ttype type = CPP_STRING;
3194   bool result = cpp_interpret_string (test.m_parser,
3195                                       input_strings.address (), 4,
3196                                       &dst_string, type);
3197   ASSERT_TRUE (result);
3198   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3199   free (const_cast <unsigned char *> (dst_string.text));
3200
3201   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3202   test.m_concats.record_string_concatenation (4, input_locs);
3203
3204   location_t initial_loc = input_locs[0];
3205
3206   for (int i = 0; i <= 4; i++)
3207     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3208   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3209   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3210   for (int i = 7; i <= 9; i++)
3211     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3212
3213   /* NUL-terminator should use the location of the final closing quote.  */
3214   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3215
3216   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3217 }
3218
3219 /* Test of string literal in a macro.  */
3220
3221 static void
3222 test_lexer_string_locations_macro (const line_table_case &case_)
3223 {
3224   /* Digits 0-9.
3225      .....................0000000001111111111.22222222223.
3226      .....................1234567890123456789.01234567890.  */
3227   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3228                          "  MACRO");
3229   lexer_test test (case_, content, NULL);
3230
3231   /* Verify that we get the expected tokens back.  */
3232   const cpp_token *tok = test.get_token ();
3233   ASSERT_EQ (tok->type, CPP_PADDING);
3234
3235   tok = test.get_token ();
3236   ASSERT_EQ (tok->type, CPP_STRING);
3237   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3238
3239   /* Verify ranges of individual characters.  We ought to
3240      see columns within the macro definition.  */
3241   for (int i = 0; i <= 10; i++)
3242     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3243                           i, 1, 20 + i, 20 + i);
3244
3245   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3246
3247   tok = test.get_token ();
3248   ASSERT_EQ (tok->type, CPP_PADDING);
3249 }
3250
3251 /* Test of stringification of a macro argument.  */
3252
3253 static void
3254 test_lexer_string_locations_stringified_macro_argument
3255   (const line_table_case &case_)
3256 {
3257   /* .....................000000000111111111122222222223.
3258      .....................123456789012345678901234567890.  */
3259   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3260                          "MACRO(foo)\n");
3261   lexer_test test (case_, content, NULL);
3262
3263   /* Verify that we get the expected token back.  */
3264   const cpp_token *tok = test.get_token ();
3265   ASSERT_EQ (tok->type, CPP_PADDING);
3266
3267   tok = test.get_token ();
3268   ASSERT_EQ (tok->type, CPP_STRING);
3269   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3270
3271   /* We don't support getting the location of a stringified macro
3272      argument.  Verify that it fails gracefully.  */
3273   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3274                                   "cpp_interpret_string_1 failed");
3275
3276   tok = test.get_token ();
3277   ASSERT_EQ (tok->type, CPP_PADDING);
3278
3279   tok = test.get_token ();
3280   ASSERT_EQ (tok->type, CPP_PADDING);
3281 }
3282
3283 /* Ensure that we are fail gracefully if something attempts to pass
3284    in a location that isn't a string literal token.  Seen on this code:
3285
3286      const char a[] = " %d ";
3287      __builtin_printf (a, 0.5);
3288                        ^
3289
3290    when c-format.c erroneously used the indicated one-character
3291    location as the format string location, leading to a read past the
3292    end of a string buffer in cpp_interpret_string_1.  */
3293
3294 static void
3295 test_lexer_string_locations_non_string (const line_table_case &case_)
3296 {
3297   /* .....................000000000111111111122222222223.
3298      .....................123456789012345678901234567890.  */
3299   const char *content = ("         a\n");
3300   lexer_test test (case_, content, NULL);
3301
3302   /* Verify that we get the expected token back.  */
3303   const cpp_token *tok = test.get_token ();
3304   ASSERT_EQ (tok->type, CPP_NAME);
3305   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3306
3307   /* At this point, libcpp is attempting to interpret the name as a
3308      string literal, despite it not starting with a quote.  We don't detect
3309      that, but we should at least fail gracefully.  */
3310   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3311                                   "cpp_interpret_string_1 failed");
3312 }
3313
3314 /* Ensure that we can read substring information for a token which
3315    starts in one linemap and ends in another .  Adapted from
3316    gcc.dg/cpp/pr69985.c.  */
3317
3318 static void
3319 test_lexer_string_locations_long_line (const line_table_case &case_)
3320 {
3321   /* .....................000000.000111111111
3322      .....................123456.789012346789.  */
3323   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3324                          "     \"0123456789012345678901234567890123456789"
3325                          "0123456789012345678901234567890123456789"
3326                          "0123456789012345678901234567890123456789"
3327                          "0123456789\"\n");
3328
3329   lexer_test test (case_, content, NULL);
3330
3331   /* Verify that we get the expected token back.  */
3332   const cpp_token *tok = test.get_token ();
3333   ASSERT_EQ (tok->type, CPP_STRING);
3334
3335   if (!should_have_column_data_p (line_table->highest_location))
3336     return;
3337
3338   /* Verify ranges of individual characters.  */
3339   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3340   for (int i = 0; i < 131; i++)
3341     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3342                           i, 2, 7 + i, 7 + i);
3343 }
3344
3345 /* Test of locations within a raw string that doesn't contain a newline.  */
3346
3347 static void
3348 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3349 {
3350   /* .....................00.0000000111111111122.
3351      .....................12.3456789012345678901.  */
3352   const char *content = ("R\"foo(0123456789)foo\"\n");
3353   lexer_test test (case_, content, NULL);
3354
3355   /* Verify that we get the expected token back.  */
3356   const cpp_token *tok = test.get_token ();
3357   ASSERT_EQ (tok->type, CPP_STRING);
3358
3359   /* Verify that cpp_interpret_string works.  */
3360   cpp_string dst_string;
3361   const enum cpp_ttype type = CPP_STRING;
3362   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3363                                       &dst_string, type);
3364   ASSERT_TRUE (result);
3365   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3366   free (const_cast <unsigned char *> (dst_string.text));
3367
3368   if (!should_have_column_data_p (line_table->highest_location))
3369     return;
3370
3371   /* 0-9, plus the nil terminator.  */
3372   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3373   for (int i = 0; i < 11; i++)
3374     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3375                           i, 1, 7 + i, 7 + i);
3376 }
3377
3378 /* Test of locations within a raw string that contains a newline.  */
3379
3380 static void
3381 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3382 {
3383   /* .....................00.0000.
3384      .....................12.3456.  */
3385   const char *content = ("R\"foo(\n"
3386   /* .....................00000.
3387      .....................12345.  */
3388                          "hello\n"
3389                          "world\n"
3390   /* .....................00000.
3391      .....................12345.  */
3392                          ")foo\"\n");
3393   lexer_test test (case_, content, NULL);
3394
3395   /* Verify that we get the expected token back.  */
3396   const cpp_token *tok = test.get_token ();
3397   ASSERT_EQ (tok->type, CPP_STRING);
3398
3399   /* Verify that cpp_interpret_string works.  */
3400   cpp_string dst_string;
3401   const enum cpp_ttype type = CPP_STRING;
3402   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3403                                       &dst_string, type);
3404   ASSERT_TRUE (result);
3405   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3406   free (const_cast <unsigned char *> (dst_string.text));
3407
3408   if (!should_have_column_data_p (line_table->highest_location))
3409     return;
3410
3411   /* Currently we don't support locations within raw strings that
3412      contain newlines.  */
3413   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3414                                   "range endpoints are on different lines");
3415 }
3416
3417 /* Test of parsing an unterminated raw string.  */
3418
3419 static void
3420 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3421 {
3422   const char *content = "R\"ouch()ouCh\" /* etc */";
3423
3424   lexer_diagnostic_sink diagnostics;
3425   lexer_test test (case_, content, &diagnostics);
3426   test.m_implicitly_expect_EOF = false;
3427
3428   /* Attempt to parse the raw string.  */
3429   const cpp_token *tok = test.get_token ();
3430   ASSERT_EQ (tok->type, CPP_EOF);
3431
3432   ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3433   /* We expect the message "unterminated raw string"
3434      in the "cpplib" translation domain.
3435      It's not clear that dgettext is available on all supported hosts,
3436      so this assertion is commented-out for now.
3437        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3438                      diagnostics.m_diagnostics[0]);
3439   */
3440 }
3441
3442 /* Test of lexing char constants.  */
3443
3444 static void
3445 test_lexer_char_constants (const line_table_case &case_)
3446 {
3447   /* Various char constants.
3448      .....................0000000001111111111.22222222223.
3449      .....................1234567890123456789.01234567890.  */
3450   const char *content = ("         'a'\n"
3451                          "        u'a'\n"
3452                          "        U'a'\n"
3453                          "        L'a'\n"
3454                          "         'abc'\n");
3455   lexer_test test (case_, content, NULL);
3456
3457   /* Verify that we get the expected tokens back.  */
3458   /* 'a'.  */
3459   const cpp_token *tok = test.get_token ();
3460   ASSERT_EQ (tok->type, CPP_CHAR);
3461   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3462
3463   unsigned int chars_seen;
3464   int unsignedp;
3465   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3466                                           &chars_seen, &unsignedp);
3467   ASSERT_EQ (cc, 'a');
3468   ASSERT_EQ (chars_seen, 1);
3469
3470   /* u'a'.  */
3471   tok = test.get_token ();
3472   ASSERT_EQ (tok->type, CPP_CHAR16);
3473   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3474
3475   /* U'a'.  */
3476   tok = test.get_token ();
3477   ASSERT_EQ (tok->type, CPP_CHAR32);
3478   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3479
3480   /* L'a'.  */
3481   tok = test.get_token ();
3482   ASSERT_EQ (tok->type, CPP_WCHAR);
3483   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3484
3485   /* 'abc' (c-char-sequence).  */
3486   tok = test.get_token ();
3487   ASSERT_EQ (tok->type, CPP_CHAR);
3488   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3489 }
3490 /* A table of interesting location_t values, giving one axis of our test
3491    matrix.  */
3492
3493 static const location_t boundary_locations[] = {
3494   /* Zero means "don't override the default values for a new line_table".  */
3495   0,
3496
3497   /* An arbitrary non-zero value that isn't close to one of
3498      the boundary values below.  */
3499   0x10000,
3500
3501   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3502   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3503   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3504   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3505   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3506   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3507
3508   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3509   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3510   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3511   LINE_MAP_MAX_LOCATION_WITH_COLS,
3512   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3513   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3514 };
3515
3516 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3517
3518 void
3519 for_each_line_table_case (void (*testcase) (const line_table_case &))
3520 {
3521   /* As noted above in the description of struct line_table_case,
3522      we want to explore a test matrix of interesting line_table
3523      situations, running various selftests for each case within the
3524      matrix.  */
3525
3526   /* Run all tests with:
3527      (a) line_table->default_range_bits == 0, and
3528      (b) line_table->default_range_bits == 5.  */
3529   int num_cases_tested = 0;
3530   for (int default_range_bits = 0; default_range_bits <= 5;
3531        default_range_bits += 5)
3532     {
3533       /* ...and use each of the "interesting" location values as
3534          the starting location within line_table.  */
3535       const int num_boundary_locations
3536         = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3537       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3538         {
3539           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3540
3541           testcase (c);
3542
3543           num_cases_tested++;
3544         }
3545     }
3546
3547   /* Verify that we fully covered the test matrix.  */
3548   ASSERT_EQ (num_cases_tested, 2 * 12);
3549 }
3550
3551 /* Run all of the selftests within this file.  */
3552
3553 void
3554 input_c_tests ()
3555 {
3556   test_linenum_comparisons ();
3557   test_should_have_column_data_p ();
3558   test_unknown_location ();
3559   test_builtins ();
3560   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3561
3562   for_each_line_table_case (test_accessing_ordinary_linemaps);
3563   for_each_line_table_case (test_lexer);
3564   for_each_line_table_case (test_lexer_string_locations_simple);
3565   for_each_line_table_case (test_lexer_string_locations_ebcdic);
3566   for_each_line_table_case (test_lexer_string_locations_hex);
3567   for_each_line_table_case (test_lexer_string_locations_oct);
3568   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3569   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3570   for_each_line_table_case (test_lexer_string_locations_ucn4);
3571   for_each_line_table_case (test_lexer_string_locations_ucn8);
3572   for_each_line_table_case (test_lexer_string_locations_wide_string);
3573   for_each_line_table_case (test_lexer_string_locations_string16);
3574   for_each_line_table_case (test_lexer_string_locations_string32);
3575   for_each_line_table_case (test_lexer_string_locations_u8);
3576   for_each_line_table_case (test_lexer_string_locations_utf8_source);
3577   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3578   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3579   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3580   for_each_line_table_case (test_lexer_string_locations_macro);
3581   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3582   for_each_line_table_case (test_lexer_string_locations_non_string);
3583   for_each_line_table_case (test_lexer_string_locations_long_line);
3584   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3585   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3586   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3587   for_each_line_table_case (test_lexer_char_constants);
3588
3589   test_reading_source_line ();
3590 }
3591
3592 } // namespace selftest
3593
3594 #endif /* CHECKING_P */