gcc/input.cc

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic.h"
  25 #include "selftest.h"
  26 #include "cpplib.h"
  27
  28 #ifndef HAVE_ICONV
  29 #define HAVE_ICONV 0
  30 #endif
  31
  32 const char *
  33 special_fname_builtin ()
  34 {
  35   return _("<built-in>");
  36 }
  37
  38 /* Input charset configuration.  */
  39 static const char *default_charset_callback (const char *)
  40 {
  41   return nullptr;
  42 }
  43
  44 void
  45 file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
  46                                       bool should_skip_bom)
  47 {
  48   in_context.ccb = (ccb ? ccb : default_charset_callback);
  49   in_context.should_skip_bom = should_skip_bom;
  50 }
  51
  52 /* This is a cache used by get_next_line to store the content of a
  53    file to be searched for file lines.  */
  54 class file_cache_slot
  55 {
  56 public:
  57   file_cache_slot ();
  58   ~file_cache_slot ();
  59
  60   bool read_line_num (size_t line_num,
  61                       char ** line, ssize_t *line_len);
  62
  63   /* Accessors.  */
  64   const char *get_file_path () const { return m_file_path; }
  65   unsigned get_use_count () const { return m_use_count; }
  66   bool missing_trailing_newline_p () const
  67   {
  68     return m_missing_trailing_newline;
  69   }
  70   char_span get_full_file_content ();
  71
  72   void inc_use_count () { m_use_count++; }
  73
  74   bool create (const file_cache::input_context &in_context,
  75                const char *file_path, FILE *fp, unsigned highest_use_count);
  76   void evict ();
  77
  78  private:
  79   /* These are information used to store a line boundary.  */
  80   class line_info
  81   {
  82   public:
  83     /* The line number.  It starts from 1.  */
  84     size_t line_num;
  85
  86     /* The position (byte count) of the beginning of the line,
  87        relative to the file data pointer.  This starts at zero.  */
  88     size_t start_pos;
  89
  90     /* The position (byte count) of the last byte of the line.  This
  91        normally points to the '\n' character, or to one byte after the
  92        last byte of the file, if the file doesn't contain a '\n'
  93        character.  */
  94     size_t end_pos;
  95
  96     line_info (size_t l, size_t s, size_t e)
  97       : line_num (l), start_pos (s), end_pos (e)
  98     {}
  99
 100     line_info ()
 101       :line_num (0), start_pos (0), end_pos (0)
 102     {}
 103   };
 104
 105   bool needs_read_p () const;
 106   bool needs_grow_p () const;
 107   void maybe_grow ();
 108   bool read_data ();
 109   bool maybe_read_data ();
 110   bool get_next_line (char **line, ssize_t *line_len);
 111   bool read_next_line (char ** line, ssize_t *line_len);
 112   bool goto_next_line ();
 113
 114   static const size_t buffer_size = 4 * 1024;
 115   static const size_t line_record_size = 100;
 116
 117   /* The number of time this file has been accessed.  This is used
 118      to designate which file cache to evict from the cache
 119      array.  */
 120   unsigned m_use_count;
 121
 122   /* The file_path is the key for identifying a particular file in
 123      the cache.
 124      For libcpp-using code, the underlying buffer for this field is
 125      owned by the corresponding _cpp_file within the cpp_reader.  */
 126   const char *m_file_path;
 127
 128   FILE *m_fp;
 129
 130   /* This points to the content of the file that we've read so
 131      far.  */
 132   char *m_data;
 133
 134   /* The allocated buffer to be freed may start a little earlier than DATA,
 135      e.g. if a UTF8 BOM was skipped at the beginning.  */
 136   int m_alloc_offset;
 137
 138   /*  The size of the DATA array above.*/
 139   size_t m_size;
 140
 141   /* The number of bytes read from the underlying file so far.  This
 142      must be less (or equal) than SIZE above.  */
 143   size_t m_nb_read;
 144
 145   /* The index of the beginning of the current line.  */
 146   size_t m_line_start_idx;
 147
 148   /* The number of the previous line read.  This starts at 1.  Zero
 149      means we've read no line so far.  */
 150   size_t m_line_num;
 151
 152   /* This is the total number of lines of the current file.  At the
 153      moment, we try to get this information from the line map
 154      subsystem.  Note that this is just a hint.  When using the C++
 155      front-end, this hint is correct because the input file is then
 156      completely tokenized before parsing starts; so the line map knows
 157      the number of lines before compilation really starts.  For e.g,
 158      the C front-end, it can happen that we start emitting diagnostics
 159      before the line map has seen the end of the file.  */
 160   size_t m_total_lines;
 161
 162   /* Could this file be missing a trailing newline on its final line?
 163      Initially true (to cope with empty files), set to true/false
 164      as each line is read.  */
 165   bool m_missing_trailing_newline;
 166
 167   /* This is a record of the beginning and end of the lines we've seen
 168      while reading the file.  This is useful to avoid walking the data
 169      from the beginning when we are asked to read a line that is
 170      before LINE_START_IDX above.  Note that the maximum size of this
 171      record is line_record_size, so that the memory consumption
 172      doesn't explode.  We thus scale total_lines down to
 173      line_record_size.  */
 174   vec<line_info, va_heap> m_line_record;
 175
 176   void offset_buffer (int offset)
 177   {
 178     gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
 179                 : (size_t) offset <= m_size);
 180     gcc_assert (m_data);
 181     m_alloc_offset += offset;
 182     m_data += offset;
 183     m_size -= offset;
 184   }
 185
 186 };
 187
 188 /* Current position in real source file.  */
 189
 190 location_t input_location = UNKNOWN_LOCATION;
 191
 192 class line_maps *line_table;
 193
 194 /* A stashed copy of "line_table" for use by selftest::line_table_test.
 195    This needs to be a global so that it can be a GC root, and thus
 196    prevent the stashed copy from being garbage-collected if the GC runs
 197    during a line_table_test.  */
 198
 199 class line_maps *saved_line_table;
 200
 201 /* Expand the source location LOC into a human readable location.  If
 202    LOC resolves to a builtin location, the file name of the readable
 203    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 204    TRUE and LOC is virtual, then it is resolved to the expansion
 205    point of the involved macro.  Otherwise, it is resolved to the
 206    spelling location of the token.
 207
 208    When resolving to the spelling location of the token, if the
 209    resulting location is for a built-in location (that is, it has no
 210    associated line/column) in the context of a macro expansion, the
 211    returned location is the first one (while unwinding the macro
 212    location towards its expansion point) that is in real source
 213    code.
 214
 215    ASPECT controls which part of the location to use.  */
 216
 217 static expanded_location
 218 expand_location_1 (location_t loc,
 219                    bool expansion_point_p,
 220                    enum location_aspect aspect)
 221 {
 222   expanded_location xloc;
 223   const line_map_ordinary *map;
 224   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 225   tree block = NULL;
 226
 227   if (IS_ADHOC_LOC (loc))
 228     {
 229       block = LOCATION_BLOCK (loc);
 230       loc = LOCATION_LOCUS (loc);
 231     }
 232
 233   memset (&xloc, 0, sizeof (xloc));
 234
 235   if (loc >= RESERVED_LOCATION_COUNT)
 236     {
 237       if (!expansion_point_p)
 238         {
 239           /* We want to resolve LOC to its spelling location.
 240
 241              But if that spelling location is a reserved location that
 242              appears in the context of a macro expansion (like for a
 243              location for a built-in token), let's consider the first
 244              location (toward the expansion point) that is not reserved;
 245              that is, the first location that is in real source code.  */
 246           loc = linemap_unwind_to_first_non_reserved_loc (line_table,
 247                                                           loc, NULL);
 248           lrk = LRK_SPELLING_LOCATION;
 249         }
 250       loc = linemap_resolve_location (line_table, loc, lrk, &map);
 251
 252       /* loc is now either in an ordinary map, or is a reserved location.
 253          If it is a compound location, the caret is in a spelling location,
 254          but the start/finish might still be a virtual location.
 255          Depending of what the caller asked for, we may need to recurse
 256          one level in order to resolve any virtual locations in the
 257          end-points.  */
 258       switch (aspect)
 259         {
 260         default:
 261           gcc_unreachable ();
 262           /* Fall through.  */
 263         case LOCATION_ASPECT_CARET:
 264           break;
 265         case LOCATION_ASPECT_START:
 266           {
 267             location_t start = get_start (loc);
 268             if (start != loc)
 269               return expand_location_1 (start, expansion_point_p, aspect);
 270           }
 271           break;
 272         case LOCATION_ASPECT_FINISH:
 273           {
 274             location_t finish = get_finish (loc);
 275             if (finish != loc)
 276               return expand_location_1 (finish, expansion_point_p, aspect);
 277           }
 278           break;
 279         }
 280       xloc = linemap_expand_location (line_table, map, loc);
 281     }
 282
 283   xloc.data = block;
 284   if (loc <= BUILTINS_LOCATION)
 285     xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin ();
 286
 287   return xloc;
 288 }
 289
 290 /* Initialize the set of cache used for files accessed by caret
 291    diagnostic.  */
 292
 293 static void
 294 diagnostic_file_cache_init (void)
 295 {
 296   gcc_assert (global_dc);
 297   global_dc->file_cache_init ();
 298 }
 299
 300 void
 301 diagnostic_context::file_cache_init ()
 302 {
 303   if (m_file_cache == nullptr)
 304     m_file_cache = new file_cache ();
 305 }
 306
 307 /* Return the total lines number that have been read so far by the
 308    line map (in the preprocessor) so far.  For languages like C++ that
 309    entirely preprocess the input file before starting to parse, this
 310    equals the actual number of lines of the file.  */
 311
 312 static size_t
 313 total_lines_num (const char *file_path)
 314 {
 315   size_t r = 0;
 316   location_t l = 0;
 317   if (linemap_get_file_highest_location (line_table, file_path, &l))
 318     {
 319       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 320       expanded_location xloc = expand_location (l);
 321       r = xloc.line;
 322     }
 323   return r;
 324 }
 325
 326 /* Lookup the cache used for the content of a given file accessed by
 327    caret diagnostic.  Return the found cached file, or NULL if no
 328    cached file was found.  */
 329
 330 file_cache_slot *
 331 file_cache::lookup_file (const char *file_path)
 332 {
 333   gcc_assert (file_path);
 334
 335   /* This will contain the found cached file.  */
 336   file_cache_slot *r = NULL;
 337   for (unsigned i = 0; i < num_file_slots; ++i)
 338     {
 339       file_cache_slot *c = &m_file_slots[i];
 340       if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
 341         {
 342           c->inc_use_count ();
 343           r = c;
 344         }
 345     }
 346
 347   if (r)
 348     r->inc_use_count ();
 349
 350   return r;
 351 }
 352
 353 /* Purge any mention of FILENAME from the cache of files used for
 354    printing source code.  For use in selftests when working
 355    with tempfiles.  */
 356
 357 void
 358 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
 359 {
 360   gcc_assert (file_path);
 361
 362   auto file_cache = global_dc->get_file_cache ();
 363   if (!file_cache)
 364     return;
 365   file_cache->forcibly_evict_file (file_path);
 366 }
 367
 368 void
 369 file_cache::forcibly_evict_file (const char *file_path)
 370 {
 371   gcc_assert (file_path);
 372
 373   file_cache_slot *r = lookup_file (file_path);
 374   if (!r)
 375     /* Not found.  */
 376     return;
 377
 378   r->evict ();
 379 }
 380
 381 void
 382 file_cache_slot::evict ()
 383 {
 384   m_file_path = NULL;
 385   if (m_fp)
 386     fclose (m_fp);
 387   m_fp = NULL;
 388   m_nb_read = 0;
 389   m_line_start_idx = 0;
 390   m_line_num = 0;
 391   m_line_record.truncate (0);
 392   m_use_count = 0;
 393   m_total_lines = 0;
 394   m_missing_trailing_newline = true;
 395 }
 396
 397 /* Return the file cache that has been less used, recently, or the
 398    first empty one.  If HIGHEST_USE_COUNT is non-null,
 399    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 400    in the cache table.  */
 401
 402 file_cache_slot*
 403 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
 404 {
 405   diagnostic_file_cache_init ();
 406
 407   file_cache_slot *to_evict = &m_file_slots[0];
 408   unsigned huc = to_evict->get_use_count ();
 409   for (unsigned i = 1; i < num_file_slots; ++i)
 410     {
 411       file_cache_slot *c = &m_file_slots[i];
 412       bool c_is_empty = (c->get_file_path () == NULL);
 413
 414       if (c->get_use_count () < to_evict->get_use_count ()
 415           || (to_evict->get_file_path () && c_is_empty))
 416         /* We evict C because it's either an entry with a lower use
 417            count or one that is empty.  */
 418         to_evict = c;
 419
 420       if (huc < c->get_use_count ())
 421         huc = c->get_use_count ();
 422
 423       if (c_is_empty)
 424         /* We've reached the end of the cache; subsequent elements are
 425            all empty.  */
 426         break;
 427     }
 428
 429   if (highest_use_count)
 430     *highest_use_count = huc;
 431
 432   return to_evict;
 433 }
 434
 435 /* Create the cache used for the content of a given file to be
 436    accessed by caret diagnostic.  This cache is added to an array of
 437    cache and can be retrieved by lookup_file_in_cache_tab.  This
 438    function returns the created cache.  Note that only the last
 439    num_file_slots files are cached.
 440
 441    This can return nullptr if the FILE_PATH can't be opened for
 442    reading, or if the content can't be converted to the input_charset.  */
 443
 444 file_cache_slot*
 445 file_cache::add_file (const char *file_path)
 446 {
 447
 448   FILE *fp = fopen (file_path, "r");
 449   if (fp == NULL)
 450     return NULL;
 451
 452   unsigned highest_use_count = 0;
 453   file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
 454   if (!r->create (in_context, file_path, fp, highest_use_count))
 455     return NULL;
 456   return r;
 457 }
 458
 459 /* Get a borrowed char_span to the full content of this file
 460    as decoded according to the input charset, encoded as UTF-8.  */
 461
 462 char_span
 463 file_cache_slot::get_full_file_content ()
 464 {
 465   char *line;
 466   ssize_t line_len;
 467   while (get_next_line (&line, &line_len))
 468     {
 469     }
 470   return char_span (m_data, m_nb_read);
 471 }
 472
 473 /* Populate this slot for use on FILE_PATH and FP, dropping any
 474    existing cached content within it.  */
 475
 476 bool
 477 file_cache_slot::create (const file_cache::input_context &in_context,
 478                          const char *file_path, FILE *fp,
 479                          unsigned highest_use_count)
 480 {
 481   m_file_path = file_path;
 482   if (m_fp)
 483     fclose (m_fp);
 484   m_fp = fp;
 485   if (m_alloc_offset)
 486     offset_buffer (-m_alloc_offset);
 487   m_nb_read = 0;
 488   m_line_start_idx = 0;
 489   m_line_num = 0;
 490   m_line_record.truncate (0);
 491   /* Ensure that this cache entry doesn't get evicted next time
 492      add_file_to_cache_tab is called.  */
 493   m_use_count = ++highest_use_count;
 494   m_total_lines = total_lines_num (file_path);
 495   m_missing_trailing_newline = true;
 496
 497
 498   /* Check the input configuration to determine if we need to do any
 499      transformations, such as charset conversion or BOM skipping.  */
 500   if (const char *input_charset = in_context.ccb (file_path))
 501     {
 502       /* Need a full-blown conversion of the input charset.  */
 503       fclose (m_fp);
 504       m_fp = NULL;
 505       const cpp_converted_source cs
 506         = cpp_get_converted_source (file_path, input_charset);
 507       if (!cs.data)
 508         return false;
 509       if (m_data)
 510         XDELETEVEC (m_data);
 511       m_data = cs.data;
 512       m_nb_read = m_size = cs.len;
 513       m_alloc_offset = cs.data - cs.to_free;
 514     }
 515   else if (in_context.should_skip_bom)
 516     {
 517       if (read_data ())
 518         {
 519           const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
 520           offset_buffer (offset);
 521           m_nb_read -= offset;
 522         }
 523     }
 524
 525   return true;
 526 }
 527
 528 /* file_cache's ctor.  */
 529
 530 file_cache::file_cache ()
 531 : m_file_slots (new file_cache_slot[num_file_slots])
 532 {
 533   initialize_input_context (nullptr, false);
 534 }
 535
 536 /* file_cache's dtor.  */
 537
 538 file_cache::~file_cache ()
 539 {
 540   delete[] m_file_slots;
 541 }
 542
 543 /* Lookup the cache used for the content of a given file accessed by
 544    caret diagnostic.  If no cached file was found, create a new cache
 545    for this file, add it to the array of cached file and return
 546    it.
 547
 548    This can return nullptr on a cache miss if FILE_PATH can't be opened for
 549    reading, or if the content can't be converted to the input_charset.  */
 550
 551 file_cache_slot*
 552 file_cache::lookup_or_add_file (const char *file_path)
 553 {
 554   file_cache_slot *r = lookup_file (file_path);
 555   if (r == NULL)
 556     r = add_file (file_path);
 557   return r;
 558 }
 559
 560 /* Default constructor for a cache of file used by caret
 561    diagnostic.  */
 562
 563 file_cache_slot::file_cache_slot ()
 564 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
 565   m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
 566   m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
 567 {
 568   m_line_record.create (0);
 569 }
 570
 571 /* Destructor for a cache of file used by caret diagnostic.  */
 572
 573 file_cache_slot::~file_cache_slot ()
 574 {
 575   if (m_fp)
 576     {
 577       fclose (m_fp);
 578       m_fp = NULL;
 579     }
 580   if (m_data)
 581     {
 582       offset_buffer (-m_alloc_offset);
 583       XDELETEVEC (m_data);
 584       m_data = 0;
 585     }
 586   m_line_record.release ();
 587 }
 588
 589 /* Returns TRUE iff the cache would need to be filled with data coming
 590    from the file.  That is, either the cache is empty or full or the
 591    current line is empty.  Note that if the cache is full, it would
 592    need to be extended and filled again.  */
 593
 594 bool
 595 file_cache_slot::needs_read_p () const
 596 {
 597   return m_fp && (m_nb_read == 0
 598           || m_nb_read == m_size
 599           || (m_line_start_idx >= m_nb_read - 1));
 600 }
 601
 602 /*  Return TRUE iff the cache is full and thus needs to be
 603     extended.  */
 604
 605 bool
 606 file_cache_slot::needs_grow_p () const
 607 {
 608   return m_nb_read == m_size;
 609 }
 610
 611 /* Grow the cache if it needs to be extended.  */
 612
 613 void
 614 file_cache_slot::maybe_grow ()
 615 {
 616   if (!needs_grow_p ())
 617     return;
 618
 619   if (!m_data)
 620     {
 621       gcc_assert (m_size == 0 && m_alloc_offset == 0);
 622       m_size = buffer_size;
 623       m_data = XNEWVEC (char, m_size);
 624     }
 625   else
 626     {
 627       const int offset = m_alloc_offset;
 628       offset_buffer (-offset);
 629       m_size *= 2;
 630       m_data = XRESIZEVEC (char, m_data, m_size);
 631       offset_buffer (offset);
 632     }
 633 }
 634
 635 /*  Read more data into the cache.  Extends the cache if need be.
 636     Returns TRUE iff new data could be read.  */
 637
 638 bool
 639 file_cache_slot::read_data ()
 640 {
 641   if (feof (m_fp) || ferror (m_fp))
 642     return false;
 643
 644   maybe_grow ();
 645
 646   char * from = m_data + m_nb_read;
 647   size_t to_read = m_size - m_nb_read;
 648   size_t nb_read = fread (from, 1, to_read, m_fp);
 649
 650   if (ferror (m_fp))
 651     return false;
 652
 653   m_nb_read += nb_read;
 654   return !!nb_read;
 655 }
 656
 657 /* Read new data iff the cache needs to be filled with more data
 658    coming from the file FP.  Return TRUE iff the cache was filled with
 659    mode data.  */
 660
 661 bool
 662 file_cache_slot::maybe_read_data ()
 663 {
 664   if (!needs_read_p ())
 665     return false;
 666   return read_data ();
 667 }
 668
 669 /* Helper function for file_cache_slot::get_next_line (), to find the end of
 670    the next line.  Returns with the memchr convention, i.e. nullptr if a line
 671    terminator was not found.  We need to determine line endings in the same
 672    manner that libcpp does: any of \n, \r\n, or \r is a line ending.  */
 673
 674 static char *
 675 find_end_of_line (char *s, size_t len)
 676 {
 677   for (const auto end = s + len; s != end; ++s)
 678     {
 679       if (*s == '\n')
 680         return s;
 681       if (*s == '\r')
 682         {
 683           const auto next = s + 1;
 684           if (next == end)
 685             {
 686               /* Don't find the line ending if \r is the very last character
 687                  in the buffer; we do not know if it's the end of the file or
 688                  just the end of what has been read so far, and we wouldn't
 689                  want to break in the middle of what's actually a \r\n
 690                  sequence.  Instead, we will handle the case of a file ending
 691                  in a \r later.  */
 692               break;
 693             }
 694           return (*next == '\n' ? next : s);
 695         }
 696     }
 697   return nullptr;
 698 }
 699
 700 /* Read a new line from file FP, using C as a cache for the data
 701    coming from the file.  Upon successful completion, *LINE is set to
 702    the beginning of the line found.  *LINE points directly in the
 703    line cache and is only valid until the next call of get_next_line.
 704    *LINE_LEN is set to the length of the line.  Note that the line
 705    does not contain any terminal delimiter.  This function returns
 706    true if some data was read or process from the cache, false
 707    otherwise.  Note that subsequent calls to get_next_line might
 708    make the content of *LINE invalid.  */
 709
 710 bool
 711 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
 712 {
 713   /* Fill the cache with data to process.  */
 714   maybe_read_data ();
 715
 716   size_t remaining_size = m_nb_read - m_line_start_idx;
 717   if (remaining_size == 0)
 718     /* There is no more data to process.  */
 719     return false;
 720
 721   char *line_start = m_data + m_line_start_idx;
 722
 723   char *next_line_start = NULL;
 724   size_t len = 0;
 725   char *line_end = find_end_of_line (line_start, remaining_size);
 726   if (line_end == NULL)
 727     {
 728       /* We haven't found an end-of-line delimiter in the cache.
 729          Fill the cache with more data from the file and look again.  */
 730       while (maybe_read_data ())
 731         {
 732           line_start = m_data + m_line_start_idx;
 733           remaining_size = m_nb_read - m_line_start_idx;
 734           line_end = find_end_of_line (line_start, remaining_size);
 735           if (line_end != NULL)
 736             {
 737               next_line_start = line_end + 1;
 738               break;
 739             }
 740         }
 741       if (line_end == NULL)
 742         {
 743           /* We've loaded all the file into the cache and still no
 744              terminator.  Let's say the line ends up at one byte past the
 745              end of the file.  This is to stay consistent with the case
 746              of when the line ends up with a terminator and line_end points to
 747              that.  That consistency is useful below in the len calculation.
 748
 749              If the file ends in a \r, we didn't identify it as a line
 750              terminator above, so do that now instead.  */
 751           line_end = m_data + m_nb_read;
 752           if (m_nb_read && line_end[-1] == '\r')
 753             {
 754               --line_end;
 755               m_missing_trailing_newline = false;
 756             }
 757           else
 758             m_missing_trailing_newline = true;
 759         }
 760       else
 761         m_missing_trailing_newline = false;
 762     }
 763   else
 764     {
 765       next_line_start = line_end + 1;
 766       m_missing_trailing_newline = false;
 767     }
 768
 769   if (m_fp && ferror (m_fp))
 770     return false;
 771
 772   /* At this point, we've found the end of the of line.  It either points to
 773      the line terminator or to one byte after the last byte of the file.  */
 774   gcc_assert (line_end != NULL);
 775
 776   len = line_end - line_start;
 777
 778   if (m_line_start_idx < m_nb_read)
 779     *line = line_start;
 780
 781   ++m_line_num;
 782
 783   /* Before we update our line record, make sure the hint about the
 784      total number of lines of the file is correct.  If it's not, then
 785      we give up recording line boundaries from now on.  */
 786   bool update_line_record = true;
 787   if (m_line_num > m_total_lines)
 788     update_line_record = false;
 789
 790     /* Now update our line record so that re-reading lines from the
 791      before m_line_start_idx is faster.  */
 792   if (update_line_record
 793       && m_line_record.length () < line_record_size)
 794     {
 795       /* If the file lines fits in the line record, we just record all
 796          its lines ...*/
 797       if (m_total_lines <= line_record_size
 798           && m_line_num > m_line_record.length ())
 799         m_line_record.safe_push
 800           (file_cache_slot::line_info (m_line_num,
 801                                        m_line_start_idx,
 802                                        line_end - m_data));
 803       else if (m_total_lines > line_record_size)
 804         {
 805           /* ... otherwise, we just scale total_lines down to
 806              (line_record_size lines.  */
 807           size_t n = (m_line_num * line_record_size) / m_total_lines;
 808           if (m_line_record.length () == 0
 809               || n >= m_line_record.length ())
 810             m_line_record.safe_push
 811               (file_cache_slot::line_info (m_line_num,
 812                                            m_line_start_idx,
 813                                            line_end - m_data));
 814         }
 815     }
 816
 817   /* Update m_line_start_idx so that it points to the next line to be
 818      read.  */
 819   if (next_line_start)
 820     m_line_start_idx = next_line_start - m_data;
 821   else
 822     /* We didn't find any terminal '\n'.  Let's consider that the end
 823        of line is the end of the data in the cache.  The next
 824        invocation of get_next_line will either read more data from the
 825        underlying file or return false early because we've reached the
 826        end of the file.  */
 827     m_line_start_idx = m_nb_read;
 828
 829   *line_len = len;
 830
 831   return true;
 832 }
 833
 834 /* Consume the next bytes coming from the cache (or from its
 835    underlying file if there are remaining unread bytes in the file)
 836    until we reach the next end-of-line (or end-of-file).  There is no
 837    copying from the cache involved.  Return TRUE upon successful
 838    completion.  */
 839
 840 bool
 841 file_cache_slot::goto_next_line ()
 842 {
 843   char *l;
 844   ssize_t len;
 845
 846   return get_next_line (&l, &len);
 847 }
 848
 849 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 850    If the line was read successfully, *LINE points to the beginning
 851    of the line in the file cache and *LINE_LEN is the length of the
 852    line.  *LINE is not nul-terminated, but may contain zero bytes.
 853    *LINE is only valid until the next call of read_line_num.
 854    This function returns bool if a line was read.  */
 855
 856 bool
 857 file_cache_slot::read_line_num (size_t line_num,
 858                        char ** line, ssize_t *line_len)
 859 {
 860   gcc_assert (line_num > 0);
 861
 862   if (line_num <= m_line_num)
 863     {
 864       /* We've been asked to read lines that are before m_line_num.
 865          So lets use our line record (if it's not empty) to try to
 866          avoid re-reading the file from the beginning again.  */
 867
 868       if (m_line_record.is_empty ())
 869         {
 870           m_line_start_idx = 0;
 871           m_line_num = 0;
 872         }
 873       else
 874         {
 875           file_cache_slot::line_info *i = NULL;
 876           if (m_total_lines <= line_record_size)
 877             {
 878               /* In languages where the input file is not totally
 879                  preprocessed up front, the m_total_lines hint
 880                  can be smaller than the number of lines of the
 881                  file.  In that case, only the first
 882                  m_total_lines have been recorded.
 883
 884                  Otherwise, the first m_total_lines we've read have
 885                  their start/end recorded here.  */
 886               i = (line_num <= m_total_lines)
 887                 ? &m_line_record[line_num - 1]
 888                 : &m_line_record[m_total_lines - 1];
 889               gcc_assert (i->line_num <= line_num);
 890             }
 891           else
 892             {
 893               /*  So the file had more lines than our line record
 894                   size.  Thus the number of lines we've recorded has
 895                   been scaled down to line_record_size.  Let's
 896                   pick the start/end of the recorded line that is
 897                   closest to line_num.  */
 898               size_t n = (line_num <= m_total_lines)
 899                 ? line_num * line_record_size / m_total_lines
 900                 : m_line_record.length () - 1;
 901               if (n < m_line_record.length ())
 902                 {
 903                   i = &m_line_record[n];
 904                   gcc_assert (i->line_num <= line_num);
 905                 }
 906             }
 907
 908           if (i && i->line_num == line_num)
 909             {
 910               /* We have the start/end of the line.  */
 911               *line = m_data + i->start_pos;
 912               *line_len = i->end_pos - i->start_pos;
 913               return true;
 914             }
 915
 916           if (i)
 917             {
 918               m_line_start_idx = i->start_pos;
 919               m_line_num = i->line_num - 1;
 920             }
 921           else
 922             {
 923               m_line_start_idx = 0;
 924               m_line_num = 0;
 925             }
 926         }
 927     }
 928
 929   /*  Let's walk from line m_line_num up to line_num - 1, without
 930       copying any line.  */
 931   while (m_line_num < line_num - 1)
 932     if (!goto_next_line ())
 933       return false;
 934
 935   /* The line we want is the next one.  Let's read and copy it back to
 936      the caller.  */
 937   return get_next_line (line, line_len);
 938 }
 939
 940 /* Return the physical source line that corresponds to FILE_PATH/LINE.
 941    The line is not nul-terminated.  The returned pointer is only
 942    valid until the next call of location_get_source_line.
 943    Note that the line can contain several null characters,
 944    so the returned value's length has the actual length of the line.
 945    If the function fails, a NULL char_span is returned.  */
 946
 947 char_span
 948 file_cache::get_source_line (const char *file_path, int line)
 949 {
 950   char *buffer = NULL;
 951   ssize_t len;
 952
 953   if (line == 0)
 954     return char_span (NULL, 0);
 955
 956   if (file_path == NULL)
 957     return char_span (NULL, 0);
 958
 959   file_cache_slot *c = lookup_or_add_file (file_path);
 960   if (c == NULL)
 961     return char_span (NULL, 0);
 962
 963   bool read = c->read_line_num (line, &buffer, &len);
 964   if (!read)
 965     return char_span (NULL, 0);
 966
 967   return char_span (buffer, len);
 968 }
 969
 970 char_span
 971 location_get_source_line (const char *file_path, int line)
 972 {
 973   diagnostic_file_cache_init ();
 974   return global_dc->get_file_cache ()->get_source_line (file_path, line);
 975 }
 976
 977 /* Return a NUL-terminated copy of the source text between two locations, or
 978    NULL if the arguments are invalid.  The caller is responsible for freeing
 979    the return value.  */
 980
 981 char *
 982 get_source_text_between (location_t start, location_t end)
 983 {
 984   expanded_location expstart =
 985     expand_location_to_spelling_point (start, LOCATION_ASPECT_START);
 986   expanded_location expend =
 987     expand_location_to_spelling_point (end, LOCATION_ASPECT_FINISH);
 988
 989   /* If the locations are in different files or the end comes before the
 990      start, give up and return nothing.  */
 991   if (!expstart.file || !expend.file)
 992     return NULL;
 993   if (strcmp (expstart.file, expend.file) != 0)
 994     return NULL;
 995   if (expstart.line > expend.line)
 996     return NULL;
 997   if (expstart.line == expend.line
 998       && expstart.column > expend.column)
 999     return NULL;
1000   /* These aren't real column numbers, give up.  */
1001   if (expstart.column == 0 || expend.column == 0)
1002     return NULL;
1003
1004   /* For a single line we need to trim both edges.  */
1005   if (expstart.line == expend.line)
1006     {
1007       char_span line = location_get_source_line (expstart.file, expstart.line);
1008       if (line.length () < 1)
1009         return NULL;
1010       int s = expstart.column - 1;
1011       int len = expend.column - s;
1012       if (line.length () < (size_t)expend.column)
1013         return NULL;
1014       return line.subspan (s, len).xstrdup ();
1015     }
1016
1017   struct obstack buf_obstack;
1018   obstack_init (&buf_obstack);
1019
1020   /* Loop through all lines in the range and append each to buf; may trim
1021      parts of the start and end lines off depending on column values.  */
1022   for (int lnum = expstart.line; lnum <= expend.line; ++lnum)
1023     {
1024       char_span line = location_get_source_line (expstart.file, lnum);
1025       if (line.length () < 1 && (lnum != expstart.line && lnum != expend.line))
1026         continue;
1027
1028       /* For the first line in the range, only start at expstart.column */
1029       if (lnum == expstart.line)
1030         {
1031           unsigned off = expstart.column - 1;
1032           if (line.length () < off)
1033             return NULL;
1034           line = line.subspan (off, line.length() - off);
1035         }
1036       /* For the last line, don't go past expend.column */
1037       else if (lnum == expend.line)
1038         {
1039           if (line.length () < (size_t)expend.column)
1040             return NULL;
1041           line = line.subspan (0, expend.column);
1042         }
1043
1044       /* Combine spaces at the beginning of later lines.  */
1045       if (lnum > expstart.line)
1046         {
1047           unsigned off;
1048           for (off = 0; off < line.length(); ++off)
1049             if (line[off] != ' ' && line[off] != '\t')
1050               break;
1051           if (off > 0)
1052             {
1053               obstack_1grow (&buf_obstack, ' ');
1054               line = line.subspan (off, line.length() - off);
1055             }
1056         }
1057
1058       /* This does not include any trailing newlines.  */
1059       obstack_grow (&buf_obstack, line.get_buffer (), line.length ());
1060     }
1061
1062   /* NUL-terminate and finish the buf obstack.  */
1063   obstack_1grow (&buf_obstack, 0);
1064   const char *buf = (const char *) obstack_finish (&buf_obstack);
1065
1066   return xstrdup (buf);
1067 }
1068
1069
1070 char_span
1071 file_cache::get_source_file_content (const char *file_path)
1072 {
1073   file_cache_slot *c = lookup_or_add_file (file_path);
1074   if (c == nullptr)
1075     return char_span (nullptr, 0);
1076   return c->get_full_file_content ();
1077 }
1078
1079
1080 /* Get a borrowed char_span to the full content of FILE_PATH
1081    as decoded according to the input charset, encoded as UTF-8.  */
1082
1083 char_span
1084 get_source_file_content (const char *file_path)
1085 {
1086   diagnostic_file_cache_init ();
1087   return global_dc->get_file_cache ()->get_source_file_content (file_path);
1088 }
1089
1090 /* Determine if FILE_PATH missing a trailing newline on its final line.
1091    Only valid to call once all of the file has been loaded, by
1092    requesting a line number beyond the end of the file.  */
1093
1094 bool
1095 location_missing_trailing_newline (const char *file_path)
1096 {
1097   diagnostic_file_cache_init ();
1098
1099   file_cache_slot *c = global_dc->get_file_cache ()->lookup_or_add_file (file_path);
1100   if (c == NULL)
1101     return false;
1102
1103   return c->missing_trailing_newline_p ();
1104 }
1105
1106 /* Test if the location originates from the spelling location of a
1107    builtin-tokens.  That is, return TRUE if LOC is a (possibly
1108    virtual) location of a built-in token that appears in the expansion
1109    list of a macro.  Please note that this function also works on
1110    tokens that result from built-in tokens.  For instance, the
1111    function would return true if passed a token "4" that is the result
1112    of the expansion of the built-in __LINE__ macro.  */
1113 bool
1114 is_location_from_builtin_token (location_t loc)
1115 {
1116   const line_map_ordinary *map = NULL;
1117   loc = linemap_resolve_location (line_table, loc,
1118                                   LRK_SPELLING_LOCATION, &map);
1119   return loc == BUILTINS_LOCATION;
1120 }
1121
1122 /* Expand the source location LOC into a human readable location.  If
1123    LOC is virtual, it resolves to the expansion point of the involved
1124    macro.  If LOC resolves to a builtin location, the file name of the
1125    readable location is set to the string "<built-in>".  */
1126
1127 expanded_location
1128 expand_location (location_t loc)
1129 {
1130   return expand_location_1 (loc, /*expansion_point_p=*/true,
1131                             LOCATION_ASPECT_CARET);
1132 }
1133
1134 /* Expand the source location LOC into a human readable location.  If
1135    LOC is virtual, it resolves to the expansion location of the
1136    relevant macro.  If LOC resolves to a builtin location, the file
1137    name of the readable location is set to the string
1138    "<built-in>".  */
1139
1140 expanded_location
1141 expand_location_to_spelling_point (location_t loc,
1142                                    enum location_aspect aspect)
1143 {
1144   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
1145 }
1146
1147 /* The rich_location class within libcpp requires a way to expand
1148    location_t instances, and relies on the client code
1149    providing a symbol named
1150      linemap_client_expand_location_to_spelling_point
1151    to do this.
1152
1153    This is the implementation for libcommon.a (all host binaries),
1154    which simply calls into expand_location_1.  */
1155
1156 expanded_location
1157 linemap_client_expand_location_to_spelling_point (location_t loc,
1158                                                   enum location_aspect aspect)
1159 {
1160   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
1161 }
1162
1163
1164 /* If LOCATION is in a system header and if it is a virtual location
1165    for a token coming from the expansion of a macro, unwind it to
1166    the location of the expansion point of the macro.  If the expansion
1167    point is also in a system header return the original LOCATION.
1168    Otherwise, return the location of the expansion point.
1169
1170    This is used for instance when we want to emit diagnostics about a
1171    token that may be located in a macro that is itself defined in a
1172    system header, for example, for the NULL macro.  In such a case, if
1173    LOCATION were passed directly to diagnostic functions such as
1174    warning_at, the diagnostic would be suppressed (unless
1175    -Wsystem-headers).  */
1176
1177 location_t
1178 expansion_point_location_if_in_system_header (location_t location)
1179 {
1180   if (!in_system_header_at (location))
1181     return location;
1182
1183   location_t xloc = linemap_resolve_location (line_table, location,
1184                                               LRK_MACRO_EXPANSION_POINT,
1185                                               NULL);
1186   return in_system_header_at (xloc) ? location : xloc;
1187 }
1188
1189 /* If LOCATION is a virtual location for a token coming from the expansion
1190    of a macro, unwind to the location of the expansion point of the macro.  */
1191
1192 location_t
1193 expansion_point_location (location_t location)
1194 {
1195   return linemap_resolve_location (line_table, location,
1196                                    LRK_MACRO_EXPANSION_POINT, NULL);
1197 }
1198
1199 /* Construct a location with caret at CARET, ranging from START to
1200    FINISH.
1201
1202    For example, consider:
1203
1204                  11111111112
1205         12345678901234567890
1206      522
1207      523   return foo + bar;
1208                   ~~~~^~~~~
1209      524
1210
1211    The location's caret is at the "+", line 523 column 15, but starts
1212    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
1213    of "bar" at column 19.  */
1214
1215 location_t
1216 make_location (location_t caret, location_t start, location_t finish)
1217 {
1218   return line_table->make_location (caret, start, finish);
1219 }
1220
1221 /* Same as above, but taking a source range rather than two locations.  */
1222
1223 location_t
1224 make_location (location_t caret, source_range src_range)
1225 {
1226   location_t pure_loc = get_pure_location (caret);
1227   return line_table->get_or_create_combined_loc (pure_loc, src_range,
1228                                                  nullptr, 0);
1229 }
1230
1231 /* An expanded_location stores the column in byte units.  This function
1232    converts that column to display units.  That requires reading the associated
1233    source line in order to calculate the display width.  If that cannot be done
1234    for any reason, then returns the byte column as a fallback.  */
1235 int
1236 location_compute_display_column (expanded_location exploc,
1237                                  const cpp_char_column_policy &policy)
1238 {
1239   if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1240     return exploc.column;
1241   char_span line = location_get_source_line (exploc.file, exploc.line);
1242   /* If line is NULL, this function returns exploc.column which is the
1243      desired fallback.  */
1244   return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
1245                                             exploc.column, policy);
1246 }
1247
1248 /* Dump statistics to stderr about the memory usage of the line_table
1249    set of line maps.  This also displays some statistics about macro
1250    expansion.  */
1251
1252 void
1253 dump_line_table_statistics (void)
1254 {
1255   struct linemap_stats s;
1256   long total_used_map_size,
1257     macro_maps_size,
1258     total_allocated_map_size;
1259
1260   memset (&s, 0, sizeof (s));
1261
1262   linemap_get_statistics (line_table, &s);
1263
1264   macro_maps_size = s.macro_maps_used_size
1265     + s.macro_maps_locations_size;
1266
1267   total_allocated_map_size = s.ordinary_maps_allocated_size
1268     + s.macro_maps_allocated_size
1269     + s.macro_maps_locations_size;
1270
1271   total_used_map_size = s.ordinary_maps_used_size
1272     + s.macro_maps_used_size
1273     + s.macro_maps_locations_size;
1274
1275   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
1276            s.num_expanded_macros);
1277   if (s.num_expanded_macros != 0)
1278     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
1279              s.num_macro_tokens / s.num_expanded_macros);
1280   fprintf (stderr,
1281            "\nLine Table allocations during the "
1282            "compilation process\n");
1283   fprintf (stderr, "Number of ordinary maps used:        " PRsa (5) "\n",
1284            SIZE_AMOUNT (s.num_ordinary_maps_used));
1285   fprintf (stderr, "Ordinary map used size:              " PRsa (5) "\n",
1286            SIZE_AMOUNT (s.ordinary_maps_used_size));
1287   fprintf (stderr, "Number of ordinary maps allocated:   " PRsa (5) "\n",
1288            SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1289   fprintf (stderr, "Ordinary maps allocated size:        " PRsa (5) "\n",
1290            SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1291   fprintf (stderr, "Number of macro maps used:           " PRsa (5) "\n",
1292            SIZE_AMOUNT (s.num_macro_maps_used));
1293   fprintf (stderr, "Macro maps used size:                " PRsa (5) "\n",
1294            SIZE_AMOUNT (s.macro_maps_used_size));
1295   fprintf (stderr, "Macro maps locations size:           " PRsa (5) "\n",
1296            SIZE_AMOUNT (s.macro_maps_locations_size));
1297   fprintf (stderr, "Macro maps size:                     " PRsa (5) "\n",
1298            SIZE_AMOUNT (macro_maps_size));
1299   fprintf (stderr, "Duplicated maps locations size:      " PRsa (5) "\n",
1300            SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1301   fprintf (stderr, "Total allocated maps size:           " PRsa (5) "\n",
1302            SIZE_AMOUNT (total_allocated_map_size));
1303   fprintf (stderr, "Total used maps size:                " PRsa (5) "\n",
1304            SIZE_AMOUNT (total_used_map_size));
1305   fprintf (stderr, "Ad-hoc table size:                   " PRsa (5) "\n",
1306            SIZE_AMOUNT (s.adhoc_table_size));
1307   fprintf (stderr, "Ad-hoc table entries used:           " PRsa (5) "\n",
1308            SIZE_AMOUNT (s.adhoc_table_entries_used));
1309   fprintf (stderr, "optimized_ranges:                    " PRsa (5) "\n",
1310            SIZE_AMOUNT (line_table->m_num_optimized_ranges));
1311   fprintf (stderr, "unoptimized_ranges:                  " PRsa (5) "\n",
1312            SIZE_AMOUNT (line_table->m_num_unoptimized_ranges));
1313
1314   fprintf (stderr, "\n");
1315 }
1316
1317 /* Get location one beyond the final location in ordinary map IDX.  */
1318
1319 static location_t
1320 get_end_location (class line_maps *set, unsigned int idx)
1321 {
1322   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1323     return set->highest_location;
1324
1325   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1326   return MAP_START_LOCATION (next_map);
1327 }
1328
1329 /* Helper function for write_digit_row.  */
1330
1331 static void
1332 write_digit (FILE *stream, int digit)
1333 {
1334   fputc ('0' + (digit % 10), stream);
1335 }
1336
1337 /* Helper function for dump_location_info.
1338    Write a row of numbers to STREAM, numbering a source line,
1339    giving the units, tens, hundreds etc of the column number.  */
1340
1341 static void
1342 write_digit_row (FILE *stream, int indent,
1343                  const line_map_ordinary *map,
1344                  location_t loc, int max_col, int divisor)
1345 {
1346   fprintf (stream, "%*c", indent, ' ');
1347   fprintf (stream, "|");
1348   for (int column = 1; column < max_col; column++)
1349     {
1350       location_t column_loc = loc + (column << map->m_range_bits);
1351       write_digit (stream, column_loc / divisor);
1352     }
1353   fprintf (stream, "\n");
1354 }
1355
1356 /* Write a half-closed (START) / half-open (END) interval of
1357    location_t to STREAM.  */
1358
1359 static void
1360 dump_location_range (FILE *stream,
1361                      location_t start, location_t end)
1362 {
1363   fprintf (stream,
1364            "  location_t interval: %u <= loc < %u\n",
1365            start, end);
1366 }
1367
1368 /* Write a labelled description of a half-closed (START) / half-open (END)
1369    interval of location_t to STREAM.  */
1370
1371 static void
1372 dump_labelled_location_range (FILE *stream,
1373                               const char *name,
1374                               location_t start, location_t end)
1375 {
1376   fprintf (stream, "%s\n", name);
1377   dump_location_range (stream, start, end);
1378   fprintf (stream, "\n");
1379 }
1380
1381 /* Write a visualization of the locations in the line_table to STREAM.  */
1382
1383 void
1384 dump_location_info (FILE *stream)
1385 {
1386   /* Visualize the reserved locations.  */
1387   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1388                                 0, RESERVED_LOCATION_COUNT);
1389
1390   /* Visualize the ordinary line_map instances, rendering the sources. */
1391   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1392     {
1393       location_t end_location = get_end_location (line_table, idx);
1394       /* half-closed: doesn't include this one. */
1395
1396       const line_map_ordinary *map
1397         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1398       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1399       dump_location_range (stream,
1400                            MAP_START_LOCATION (map), end_location);
1401       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1402       fprintf (stream, "  starting at line: %i\n",
1403                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1404       fprintf (stream, "  column and range bits: %i\n",
1405                map->m_column_and_range_bits);
1406       fprintf (stream, "  column bits: %i\n",
1407                map->m_column_and_range_bits - map->m_range_bits);
1408       fprintf (stream, "  range bits: %i\n",
1409                map->m_range_bits);
1410       const char * reason;
1411       switch (map->reason) {
1412       case LC_ENTER:
1413         reason = "LC_ENTER";
1414         break;
1415       case LC_LEAVE:
1416         reason = "LC_LEAVE";
1417         break;
1418       case LC_RENAME:
1419         reason = "LC_RENAME";
1420         break;
1421       case LC_RENAME_VERBATIM:
1422         reason = "LC_RENAME_VERBATIM";
1423         break;
1424       case LC_ENTER_MACRO:
1425         reason = "LC_RENAME_MACRO";
1426         break;
1427       default:
1428         reason = "Unknown";
1429       }
1430       fprintf (stream, "  reason: %d (%s)\n", map->reason, reason);
1431
1432       const line_map_ordinary *includer_map
1433         = linemap_included_from_linemap (line_table, map);
1434       fprintf (stream, "  included from location: %d",
1435                linemap_included_from (map));
1436       if (includer_map) {
1437         fprintf (stream, " (in ordinary map %d)",
1438                  int (includer_map - line_table->info_ordinary.maps));
1439       }
1440       fprintf (stream, "\n");
1441
1442       /* Render the span of source lines that this "map" covers.  */
1443       for (location_t loc = MAP_START_LOCATION (map);
1444            loc < end_location;
1445            loc += (1 << map->m_range_bits) )
1446         {
1447           gcc_assert (pure_location_p (line_table, loc) );
1448
1449           expanded_location exploc
1450             = linemap_expand_location (line_table, map, loc);
1451
1452           if (exploc.column == 0)
1453             {
1454               /* Beginning of a new source line: draw the line.  */
1455
1456               char_span line_text = location_get_source_line (exploc.file,
1457                                                               exploc.line);
1458               if (!line_text)
1459                 break;
1460               fprintf (stream,
1461                        "%s:%3i|loc:%5i|%.*s\n",
1462                        exploc.file, exploc.line,
1463                        loc,
1464                        (int)line_text.length (), line_text.get_buffer ());
1465
1466               /* "loc" is at column 0, which means "the whole line".
1467                  Render the locations *within* the line, by underlining
1468                  it, showing the location_t numeric values
1469                  at each column.  */
1470               size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1471               if (max_col > line_text.length ())
1472                 max_col = line_text.length () + 1;
1473
1474               int len_lnum = num_digits (exploc.line);
1475               if (len_lnum < 3)
1476                 len_lnum = 3;
1477               int len_loc = num_digits (loc);
1478               if (len_loc < 5)
1479                 len_loc = 5;
1480
1481               int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1482
1483               /* Thousands.  */
1484               if (end_location > 999)
1485                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1486
1487               /* Hundreds.  */
1488               if (end_location > 99)
1489                 write_digit_row (stream, indent, map, loc, max_col, 100);
1490
1491               /* Tens.  */
1492               write_digit_row (stream, indent, map, loc, max_col, 10);
1493
1494               /* Units.  */
1495               write_digit_row (stream, indent, map, loc, max_col, 1);
1496             }
1497         }
1498       fprintf (stream, "\n");
1499     }
1500
1501   /* Visualize unallocated values.  */
1502   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1503                                 line_table->highest_location,
1504                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1505
1506   /* Visualize the macro line_map instances, rendering the sources. */
1507   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1508     {
1509       /* Each macro map that is allocated owns location_t values
1510          that are *lower* that the one before them.
1511          Hence it's meaningful to view them either in order of ascending
1512          source locations, or in order of ascending macro map index.  */
1513       const bool ascending_location_ts = true;
1514       unsigned int idx = (ascending_location_ts
1515                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1516                           : i);
1517       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1518       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1519                idx,
1520                linemap_map_get_macro_name (map),
1521                MACRO_MAP_NUM_MACRO_TOKENS (map));
1522       dump_location_range (stream,
1523                            map->start_location,
1524                            (map->start_location
1525                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1526       inform (map->get_expansion_point_location (),
1527               "expansion point is location %i",
1528               map->get_expansion_point_location ());
1529       fprintf (stream, "  map->start_location: %u\n",
1530                map->start_location);
1531
1532       fprintf (stream, "  macro_locations:\n");
1533       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1534         {
1535           location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1536           location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1537
1538           /* linemap_add_macro_token encodes token numbers in an expansion
1539              by putting them after MAP_START_LOCATION. */
1540
1541           /* I'm typically seeing 4 uninitialized entries at the end of
1542              0xafafafaf.
1543              This appears to be due to macro.cc:replace_args
1544              adding 2 extra args for padding tokens; presumably there may
1545              be a leading and/or trailing padding token injected,
1546              each for 2 more location slots.
1547              This would explain there being up to 4 location_ts slots
1548              that may be uninitialized.  */
1549
1550           fprintf (stream, "    %u: %u, %u\n",
1551                    i,
1552                    x,
1553                    y);
1554           if (x == y)
1555             {
1556               if (x < MAP_START_LOCATION (map))
1557                 inform (x, "token %u has %<x-location == y-location == %u%>",
1558                         i, x);
1559               else
1560                 fprintf (stream,
1561                          "x-location == y-location == %u encodes token # %u\n",
1562                          x, x - MAP_START_LOCATION (map));
1563                 }
1564           else
1565             {
1566               inform (x, "token %u has %<x-location == %u%>", i, x);
1567               inform (x, "token %u has %<y-location == %u%>", i, y);
1568             }
1569         }
1570       fprintf (stream, "\n");
1571     }
1572
1573   /* It appears that MAX_LOCATION_T itself is never assigned to a
1574      macro map, presumably due to an off-by-one error somewhere
1575      between the logic in linemap_enter_macro and
1576      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1577   dump_labelled_location_range (stream, "MAX_LOCATION_T",
1578                                 MAX_LOCATION_T,
1579                                 MAX_LOCATION_T + 1);
1580
1581   /* Visualize ad-hoc values.  */
1582   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1583                                 MAX_LOCATION_T + 1, UINT_MAX);
1584 }
1585
1586 /* string_concat's constructor.  */
1587
1588 string_concat::string_concat (int num, location_t *locs)
1589   : m_num (num)
1590 {
1591   m_locs = ggc_vec_alloc <location_t> (num);
1592   for (int i = 0; i < num; i++)
1593     m_locs[i] = locs[i];
1594 }
1595
1596 /* string_concat_db's constructor.  */
1597
1598 string_concat_db::string_concat_db ()
1599 {
1600   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1601 }
1602
1603 /* Record that a string concatenation occurred, covering NUM
1604    string literal tokens.  LOCS is an array of size NUM, containing the
1605    locations of the tokens.  A copy of LOCS is taken.  */
1606
1607 void
1608 string_concat_db::record_string_concatenation (int num, location_t *locs)
1609 {
1610   gcc_assert (num > 1);
1611   gcc_assert (locs);
1612
1613   location_t key_loc = get_key_loc (locs[0]);
1614   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1615      any data now recorded under key 'key_loc' would be overwritten by a
1616      subsequent call with the same key 'key_loc'.  */
1617   if (RESERVED_LOCATION_P (key_loc))
1618     return;
1619
1620   string_concat *concat
1621     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1622   m_table->put (key_loc, concat);
1623 }
1624
1625 /* Determine if LOC was the location of the initial token of a
1626    concatenation of string literal tokens.
1627    If so, *OUT_NUM is written to with the number of tokens, and
1628    *OUT_LOCS with the location of an array of locations of the
1629    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1630    storage owned by the string_concat_db.
1631    Otherwise, return false.  */
1632
1633 bool
1634 string_concat_db::get_string_concatenation (location_t loc,
1635                                             int *out_num,
1636                                             location_t **out_locs)
1637 {
1638   gcc_assert (out_num);
1639   gcc_assert (out_locs);
1640
1641   location_t key_loc = get_key_loc (loc);
1642   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1643      discussion in 'string_concat_db::record_string_concatenation'.  */
1644   if (RESERVED_LOCATION_P (key_loc))
1645     return false;
1646
1647   string_concat **concat = m_table->get (key_loc);
1648   if (!concat)
1649     return false;
1650
1651   *out_num = (*concat)->m_num;
1652   *out_locs =(*concat)->m_locs;
1653   return true;
1654 }
1655
1656 /* Internal function.  Canonicalize LOC into a form suitable for
1657    use as a key within the database, stripping away macro expansion,
1658    ad-hoc information, and range information, using the location of
1659    the start of LOC within an ordinary linemap.  */
1660
1661 location_t
1662 string_concat_db::get_key_loc (location_t loc)
1663 {
1664   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1665                                   NULL);
1666
1667   loc = get_range_from_loc (line_table, loc).m_start;
1668
1669   return loc;
1670 }
1671
1672 /* Helper class for use within get_substring_ranges_for_loc.
1673    An vec of cpp_string with responsibility for releasing all of the
1674    str->text for each str in the vector.  */
1675
1676 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1677 {
1678  public:
1679   auto_cpp_string_vec (int alloc)
1680     : auto_vec <cpp_string> (alloc) {}
1681
1682   ~auto_cpp_string_vec ()
1683   {
1684     /* Clean up the copies within this vec.  */
1685     int i;
1686     cpp_string *str;
1687     FOR_EACH_VEC_ELT (*this, i, str)
1688       free (const_cast <unsigned char *> (str->text));
1689   }
1690 };
1691
1692 /* Attempt to populate RANGES with source location information on the
1693    individual characters within the string literal found at STRLOC.
1694    If CONCATS is non-NULL, then any string literals that the token at
1695    STRLOC  was concatenated with are also added to RANGES.
1696
1697    Return NULL if successful, or an error message if any errors occurred (in
1698    which case RANGES may be only partially populated and should not
1699    be used).
1700
1701    This is implemented by re-parsing the relevant source line(s).  */
1702
1703 static const char *
1704 get_substring_ranges_for_loc (cpp_reader *pfile,
1705                               string_concat_db *concats,
1706                               location_t strloc,
1707                               enum cpp_ttype type,
1708                               cpp_substring_ranges &ranges)
1709 {
1710   gcc_assert (pfile);
1711
1712   if (strloc == UNKNOWN_LOCATION)
1713     return "unknown location";
1714
1715   /* Reparsing the strings requires accurate location information.
1716      If -ftrack-macro-expansion has been overridden from its default
1717      of 2, then we might have a location of a macro expansion point,
1718      rather than the location of the literal itself.
1719      Avoid this by requiring that we have full macro expansion tracking
1720      for substring locations to be available.  */
1721   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1722     return "track_macro_expansion != 2";
1723
1724   /* If #line or # 44 "file"-style directives are present, then there's
1725      no guarantee that the line numbers we have can be used to locate
1726      the strings.  For example, we might have a .i file with # directives
1727      pointing back to lines within a .c file, but the .c file might
1728      have been edited since the .i file was created.
1729      In such a case, the safest course is to disable on-demand substring
1730      locations.  */
1731   if (line_table->seen_line_directive)
1732     return "seen line directive";
1733
1734   /* If string concatenation has occurred at STRLOC, get the locations
1735      of all of the literal tokens making up the compound string.
1736      Otherwise, just use STRLOC.  */
1737   int num_locs = 1;
1738   location_t *strlocs = &strloc;
1739   if (concats)
1740     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1741
1742   auto_cpp_string_vec strs (num_locs);
1743   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1744   for (int i = 0; i < num_locs; i++)
1745     {
1746       /* Get range of strloc.  We will use it to locate the start and finish
1747          of the literal token within the line.  */
1748       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1749
1750       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1751         {
1752           /* If the string token was within a macro expansion, then we can
1753              cope with it for the simple case where we have a single token.
1754              Otherwise, bail out.  */
1755           if (src_range.m_start != src_range.m_finish)
1756             return "macro expansion";
1757         }
1758       else
1759         {
1760           if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1761             /* If so, we can't reliably determine where the token started within
1762                its line.  */
1763             return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1764
1765           if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1766             /* If so, we can't reliably determine where the token finished
1767                within its line.  */
1768             return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1769         }
1770
1771       expanded_location start
1772         = expand_location_to_spelling_point (src_range.m_start,
1773                                              LOCATION_ASPECT_START);
1774       expanded_location finish
1775         = expand_location_to_spelling_point (src_range.m_finish,
1776                                              LOCATION_ASPECT_FINISH);
1777       if (start.file != finish.file)
1778         return "range endpoints are in different files";
1779       if (start.line != finish.line)
1780         return "range endpoints are on different lines";
1781       if (start.column > finish.column)
1782         return "range endpoints are reversed";
1783
1784       char_span line = location_get_source_line (start.file, start.line);
1785       if (!line)
1786         return "unable to read source line";
1787
1788       /* Determine the location of the literal (including quotes
1789          and leading prefix chars, such as the 'u' in a u""
1790          token).  */
1791       size_t literal_length = finish.column - start.column + 1;
1792
1793       /* Ensure that we don't crash if we got the wrong location.  */
1794       if (start.column < 1)
1795         return "zero start column";
1796       if (line.length () < (start.column - 1 + literal_length))
1797         return "line is not wide enough";
1798
1799       char_span literal = line.subspan (start.column - 1, literal_length);
1800
1801       cpp_string from;
1802       from.len = literal_length;
1803       /* Make a copy of the literal, to avoid having to rely on
1804          the lifetime of the copy of the line within the cache.
1805          This will be released by the auto_cpp_string_vec dtor.  */
1806       from.text = (unsigned char *)literal.xstrdup ();
1807       strs.safe_push (from);
1808
1809       /* For very long lines, a new linemap could have started
1810          halfway through the token.
1811          Ensure that the loc_reader uses the linemap of the
1812          *end* of the token for its start location.  */
1813       const line_map_ordinary *start_ord_map;
1814       linemap_resolve_location (line_table, src_range.m_start,
1815                                 LRK_SPELLING_LOCATION, &start_ord_map);
1816       const line_map_ordinary *final_ord_map;
1817       linemap_resolve_location (line_table, src_range.m_finish,
1818                                 LRK_SPELLING_LOCATION, &final_ord_map);
1819       if (start_ord_map == NULL || final_ord_map == NULL)
1820         return "failed to get ordinary maps";
1821       /* Bulletproofing.  We ought to only have different ordinary maps
1822          for start vs finish due to line-length jumps.  */
1823       if (start_ord_map != final_ord_map
1824           && start_ord_map->to_file != final_ord_map->to_file)
1825         return "start and finish are spelled in different ordinary maps";
1826       /* The file from linemap_resolve_location ought to match that from
1827          expand_location_to_spelling_point.  */
1828       if (start_ord_map->to_file != start.file)
1829         return "mismatching file after resolving linemap";
1830
1831       location_t start_loc
1832         = linemap_position_for_line_and_column (line_table, final_ord_map,
1833                                                 start.line, start.column);
1834
1835       cpp_string_location_reader loc_reader (start_loc, line_table);
1836       loc_readers.safe_push (loc_reader);
1837     }
1838
1839   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1840   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1841                                                  loc_readers.address (),
1842                                                  num_locs, &ranges, type);
1843   if (err)
1844     return err;
1845
1846   /* Success: "ranges" should now contain information on the string.  */
1847   return NULL;
1848 }
1849
1850 /* Attempt to populate *OUT_LOC with source location information on the
1851    given characters within the string literal found at STRLOC.
1852    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1853    character set.
1854
1855    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1856    and string literal "012345\n789"
1857    *OUT_LOC is written to with:
1858      "012345\n789"
1859          ~^~~~~
1860
1861    If CONCATS is non-NULL, then any string literals that the token at
1862    STRLOC was concatenated with are also considered.
1863
1864    This is implemented by re-parsing the relevant source line(s).
1865
1866    Return NULL if successful, or an error message if any errors occurred.
1867    Error messages are intended for GCC developers (to help debugging) rather
1868    than for end-users.  */
1869
1870 const char *
1871 get_location_within_string (cpp_reader *pfile,
1872                             string_concat_db *concats,
1873                             location_t strloc,
1874                             enum cpp_ttype type,
1875                             int caret_idx, int start_idx, int end_idx,
1876                             location_t *out_loc)
1877 {
1878   gcc_checking_assert (caret_idx >= 0);
1879   gcc_checking_assert (start_idx >= 0);
1880   gcc_checking_assert (end_idx >= 0);
1881   gcc_assert (out_loc);
1882
1883   cpp_substring_ranges ranges;
1884   const char *err
1885     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1886   if (err)
1887     return err;
1888
1889   if (caret_idx >= ranges.get_num_ranges ())
1890     return "caret_idx out of range";
1891   if (start_idx >= ranges.get_num_ranges ())
1892     return "start_idx out of range";
1893   if (end_idx >= ranges.get_num_ranges ())
1894     return "end_idx out of range";
1895
1896   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1897                             ranges.get_range (start_idx).m_start,
1898                             ranges.get_range (end_idx).m_finish);
1899   return NULL;
1900 }
1901
1902 /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */
1903
1904 location_t
1905 location_with_discriminator (location_t locus, int discriminator)
1906 {
1907   tree block = LOCATION_BLOCK (locus);
1908   source_range src_range = get_range_from_loc (line_table, locus);
1909   locus = get_pure_location (locus);
1910
1911   if (locus == UNKNOWN_LOCATION)
1912     return locus;
1913
1914   return line_table->get_or_create_combined_loc (locus, src_range, block,
1915                                                  discriminator);
1916 }
1917
1918 /* Return TRUE if LOCUS represents a location with a discriminator.  */
1919
1920 bool
1921 has_discriminator (location_t locus)
1922 {
1923   return get_discriminator_from_loc (locus) != 0;
1924 }
1925
1926 /* Return the discriminator for LOCUS.  */
1927
1928 int
1929 get_discriminator_from_loc (location_t locus)
1930 {
1931   return get_discriminator_from_loc (line_table, locus);
1932 }
1933
1934 #if CHECKING_P
1935
1936 namespace selftest {
1937
1938 /* Selftests of location handling.  */
1939
1940 /* Attempt to populate *OUT_RANGE with source location information on the
1941    given character within the string literal found at STRLOC.
1942    CHAR_IDX refers to an offset within the execution character set.
1943    If CONCATS is non-NULL, then any string literals that the token at
1944    STRLOC was concatenated with are also considered.
1945
1946    This is implemented by re-parsing the relevant source line(s).
1947
1948    Return NULL if successful, or an error message if any errors occurred.
1949    Error messages are intended for GCC developers (to help debugging) rather
1950    than for end-users.  */
1951
1952 static const char *
1953 get_source_range_for_char (cpp_reader *pfile,
1954                            string_concat_db *concats,
1955                            location_t strloc,
1956                            enum cpp_ttype type,
1957                            int char_idx,
1958                            source_range *out_range)
1959 {
1960   gcc_checking_assert (char_idx >= 0);
1961   gcc_assert (out_range);
1962
1963   cpp_substring_ranges ranges;
1964   const char *err
1965     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1966   if (err)
1967     return err;
1968
1969   if (char_idx >= ranges.get_num_ranges ())
1970     return "char_idx out of range";
1971
1972   *out_range = ranges.get_range (char_idx);
1973   return NULL;
1974 }
1975
1976 /* As get_source_range_for_char, but write to *OUT the number
1977    of ranges that are available.  */
1978
1979 static const char *
1980 get_num_source_ranges_for_substring (cpp_reader *pfile,
1981                                      string_concat_db *concats,
1982                                      location_t strloc,
1983                                      enum cpp_ttype type,
1984                                      int *out)
1985 {
1986   gcc_assert (out);
1987
1988   cpp_substring_ranges ranges;
1989   const char *err
1990     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1991
1992   if (err)
1993     return err;
1994
1995   *out = ranges.get_num_ranges ();
1996   return NULL;
1997 }
1998
1999 /* Selftests of location handling.  */
2000
2001 /* Verify that compare() on linenum_type handles comparisons over the full
2002    range of the type.  */
2003
2004 static void
2005 test_linenum_comparisons ()
2006 {
2007   linenum_type min_line (0);
2008   linenum_type max_line (0xffffffff);
2009   ASSERT_EQ (0, compare (min_line, min_line));
2010   ASSERT_EQ (0, compare (max_line, max_line));
2011
2012   ASSERT_GT (compare (max_line, min_line), 0);
2013   ASSERT_LT (compare (min_line, max_line), 0);
2014 }
2015
2016 /* Helper function for verifying location data: when location_t
2017    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
2018    as having column 0.  */
2019
2020 static bool
2021 should_have_column_data_p (location_t loc)
2022 {
2023   if (IS_ADHOC_LOC (loc))
2024     loc = get_location_from_adhoc_loc (line_table, loc);
2025   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
2026     return false;
2027   return true;
2028 }
2029
2030 /* Selftest for should_have_column_data_p.  */
2031
2032 static void
2033 test_should_have_column_data_p ()
2034 {
2035   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
2036   ASSERT_TRUE
2037     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
2038   ASSERT_FALSE
2039     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
2040 }
2041
2042 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
2043    on LOC.  */
2044
2045 static void
2046 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
2047               location_t loc)
2048 {
2049   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
2050   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
2051   /* If location_t values are sufficiently high, then column numbers
2052      will be unavailable and LOCATION_COLUMN (loc) will be 0.
2053      When close to the threshold, column numbers *may* be present: if
2054      the final linemap before the threshold contains a line that straddles
2055      the threshold, locations in that line have column information.  */
2056   if (should_have_column_data_p (loc))
2057     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
2058 }
2059
2060 /* Various selftests involve constructing a line table and one or more
2061    line maps within it.
2062
2063    For maximum test coverage we want to run these tests with a variety
2064    of situations:
2065    - line_table->default_range_bits: some frontends use a non-zero value
2066    and others use zero
2067    - the fallback modes within line-map.cc: there are various threshold
2068    values for location_t beyond line-map.cc changes
2069    behavior (disabling of the range-packing optimization, disabling
2070    of column-tracking).  We can exercise these by starting the line_table
2071    at interesting values at or near these thresholds.
2072
2073    The following struct describes a particular case within our test
2074    matrix.  */
2075
2076 class line_table_case
2077 {
2078 public:
2079   line_table_case (int default_range_bits, int base_location)
2080   : m_default_range_bits (default_range_bits),
2081     m_base_location (base_location)
2082   {}
2083
2084   int m_default_range_bits;
2085   int m_base_location;
2086 };
2087
2088 /* Constructor.  Store the old value of line_table, and create a new
2089    one, using sane defaults.  */
2090
2091 line_table_test::line_table_test ()
2092 {
2093   gcc_assert (saved_line_table == NULL);
2094   saved_line_table = line_table;
2095   line_table = ggc_alloc<line_maps> ();
2096   linemap_init (line_table, BUILTINS_LOCATION);
2097   gcc_assert (saved_line_table->m_reallocator);
2098   line_table->m_reallocator = saved_line_table->m_reallocator;
2099   gcc_assert (saved_line_table->m_round_alloc_size);
2100   line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2101   line_table->default_range_bits = 0;
2102 }
2103
2104 /* Constructor.  Store the old value of line_table, and create a new
2105    one, using the sitation described in CASE_.  */
2106
2107 line_table_test::line_table_test (const line_table_case &case_)
2108 {
2109   gcc_assert (saved_line_table == NULL);
2110   saved_line_table = line_table;
2111   line_table = ggc_alloc<line_maps> ();
2112   linemap_init (line_table, BUILTINS_LOCATION);
2113   gcc_assert (saved_line_table->m_reallocator);
2114   line_table->m_reallocator = saved_line_table->m_reallocator;
2115   gcc_assert (saved_line_table->m_round_alloc_size);
2116   line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2117   line_table->default_range_bits = case_.m_default_range_bits;
2118   if (case_.m_base_location)
2119     {
2120       line_table->highest_location = case_.m_base_location;
2121       line_table->highest_line = case_.m_base_location;
2122     }
2123 }
2124
2125 /* Destructor.  Restore the old value of line_table.  */
2126
2127 line_table_test::~line_table_test ()
2128 {
2129   gcc_assert (saved_line_table != NULL);
2130   line_table = saved_line_table;
2131   saved_line_table = NULL;
2132 }
2133
2134 /* Verify basic operation of ordinary linemaps.  */
2135
2136 static void
2137 test_accessing_ordinary_linemaps (const line_table_case &case_)
2138 {
2139   line_table_test ltt (case_);
2140
2141   /* Build a simple linemap describing some locations. */
2142   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
2143
2144   linemap_line_start (line_table, 1, 100);
2145   location_t loc_a = linemap_position_for_column (line_table, 1);
2146   location_t loc_b = linemap_position_for_column (line_table, 23);
2147
2148   linemap_line_start (line_table, 2, 100);
2149   location_t loc_c = linemap_position_for_column (line_table, 1);
2150   location_t loc_d = linemap_position_for_column (line_table, 17);
2151
2152   /* Example of a very long line.  */
2153   linemap_line_start (line_table, 3, 2000);
2154   location_t loc_e = linemap_position_for_column (line_table, 700);
2155
2156   /* Transitioning back to a short line.  */
2157   linemap_line_start (line_table, 4, 0);
2158   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
2159
2160   if (should_have_column_data_p (loc_back_to_short))
2161     {
2162       /* Verify that we switched to short lines in the linemap.  */
2163       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
2164       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
2165     }
2166
2167   /* Example of a line that will eventually be seen to be longer
2168      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
2169      below that.  */
2170   linemap_line_start (line_table, 5, 2000);
2171
2172   location_t loc_start_of_very_long_line
2173     = linemap_position_for_column (line_table, 2000);
2174   location_t loc_too_wide
2175     = linemap_position_for_column (line_table, 4097);
2176   location_t loc_too_wide_2
2177     = linemap_position_for_column (line_table, 4098);
2178
2179   /* ...and back to a sane line length.  */
2180   linemap_line_start (line_table, 6, 100);
2181   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
2182
2183   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2184
2185   /* Multiple files.  */
2186   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
2187   linemap_line_start (line_table, 1, 200);
2188   location_t loc_f = linemap_position_for_column (line_table, 150);
2189   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2190
2191   /* Verify that we can recover the location info.  */
2192   assert_loceq ("foo.c", 1, 1, loc_a);
2193   assert_loceq ("foo.c", 1, 23, loc_b);
2194   assert_loceq ("foo.c", 2, 1, loc_c);
2195   assert_loceq ("foo.c", 2, 17, loc_d);
2196   assert_loceq ("foo.c", 3, 700, loc_e);
2197   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
2198
2199   /* In the very wide line, the initial location should be fully tracked.  */
2200   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
2201   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
2202      be disabled.  */
2203   assert_loceq ("foo.c", 5, 0, loc_too_wide);
2204   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
2205   /*...and column-tracking should be re-enabled for subsequent lines.  */
2206   assert_loceq ("foo.c", 6, 10, loc_sane_again);
2207
2208   assert_loceq ("bar.c", 1, 150, loc_f);
2209
2210   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2211   ASSERT_TRUE (pure_location_p (line_table, loc_a));
2212
2213   /* Verify using make_location to build a range, and extracting data
2214      back from it.  */
2215   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2216   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2217   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2218   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2219   ASSERT_EQ (loc_b, src_range.m_start);
2220   ASSERT_EQ (loc_d, src_range.m_finish);
2221 }
2222
2223 /* Verify various properties of UNKNOWN_LOCATION.  */
2224
2225 static void
2226 test_unknown_location ()
2227 {
2228   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2229   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2230   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2231 }
2232
2233 /* Verify various properties of BUILTINS_LOCATION.  */
2234
2235 static void
2236 test_builtins ()
2237 {
2238   assert_loceq (special_fname_builtin (), 0, 0, BUILTINS_LOCATION);
2239   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2240 }
2241
2242 /* Regression test for make_location.
2243    Ensure that we use pure locations for the start/finish of the range,
2244    rather than storing a packed or ad-hoc range as the start/finish.  */
2245
2246 static void
2247 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2248 {
2249   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2250      with C++ frontend.
2251      ....................0000000001111111111222.
2252      ....................1234567890123456789012.  */
2253   const char *content = "     r += !aaa == bbb;\n";
2254   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2255   line_table_test ltt (case_);
2256   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2257
2258   const location_t c11 = linemap_position_for_column (line_table, 11);
2259   const location_t c12 = linemap_position_for_column (line_table, 12);
2260   const location_t c13 = linemap_position_for_column (line_table, 13);
2261   const location_t c14 = linemap_position_for_column (line_table, 14);
2262   const location_t c21 = linemap_position_for_column (line_table, 21);
2263
2264   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2265     return;
2266
2267   /* Use column 13 for the caret location, arbitrarily, to verify that we
2268      handle start != caret.  */
2269   const location_t aaa = make_location (c13, c12, c14);
2270   ASSERT_EQ (c13, get_pure_location (aaa));
2271   ASSERT_EQ (c12, get_start (aaa));
2272   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2273   ASSERT_EQ (c14, get_finish (aaa));
2274   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2275
2276   /* Make a location using a location with a range as the start-point.  */
2277   const location_t not_aaa = make_location (c11, aaa, c14);
2278   ASSERT_EQ (c11, get_pure_location (not_aaa));
2279   /* It should use the start location of the range, not store the range
2280      itself.  */
2281   ASSERT_EQ (c12, get_start (not_aaa));
2282   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2283   ASSERT_EQ (c14, get_finish (not_aaa));
2284   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2285
2286   /* Similarly, make a location with a range as the end-point.  */
2287   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2288   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2289   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2290   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2291   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2292   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2293   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2294   /* It should use the finish location of the range, not store the range
2295      itself.  */
2296   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2297   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2298   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2299   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2300   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2301 }
2302
2303 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
2304
2305 static void
2306 test_reading_source_line ()
2307 {
2308   /* Create a tempfile and write some text to it.  */
2309   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2310                         "01234567890123456789\n"
2311                         "This is the test text\n"
2312                         "This is the 3rd line");
2313
2314   /* Read back a specific line from the tempfile.  */
2315   char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2316   ASSERT_TRUE (source_line);
2317   ASSERT_TRUE (source_line.get_buffer () != NULL);
2318   ASSERT_EQ (20, source_line.length ());
2319   ASSERT_TRUE (!strncmp ("This is the 3rd line",
2320                          source_line.get_buffer (), source_line.length ()));
2321
2322   source_line = location_get_source_line (tmp.get_filename (), 2);
2323   ASSERT_TRUE (source_line);
2324   ASSERT_TRUE (source_line.get_buffer () != NULL);
2325   ASSERT_EQ (21, source_line.length ());
2326   ASSERT_TRUE (!strncmp ("This is the test text",
2327                          source_line.get_buffer (), source_line.length ()));
2328
2329   source_line = location_get_source_line (tmp.get_filename (), 4);
2330   ASSERT_FALSE (source_line);
2331   ASSERT_TRUE (source_line.get_buffer () == NULL);
2332 }
2333
2334 /* Tests of lexing.  */
2335
2336 /* Verify that token TOK from PARSER has cpp_token_as_text
2337    equal to EXPECTED_TEXT.  */
2338
2339 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
2340   SELFTEST_BEGIN_STMT                                                   \
2341     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
2342     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
2343   SELFTEST_END_STMT
2344
2345 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2346    and ranges from EXP_START_COL to EXP_FINISH_COL.
2347    Use LOC as the effective location of the selftest.  */
2348
2349 static void
2350 assert_token_loc_eq (const location &loc,
2351                      const cpp_token *tok,
2352                      const char *exp_filename, int exp_linenum,
2353                      int exp_start_col, int exp_finish_col)
2354 {
2355   location_t tok_loc = tok->src_loc;
2356   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2357   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2358
2359   /* If location_t values are sufficiently high, then column numbers
2360      will be unavailable.  */
2361   if (!should_have_column_data_p (tok_loc))
2362     return;
2363
2364   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2365   source_range tok_range = get_range_from_loc (line_table, tok_loc);
2366   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2367   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2368 }
2369
2370 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2371    SELFTEST_LOCATION as the effective location of the selftest.  */
2372
2373 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2374                             EXP_START_COL, EXP_FINISH_COL) \
2375   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2376                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2377
2378 /* Test of lexing a file using libcpp, verifying tokens and their
2379    location information.  */
2380
2381 static void
2382 test_lexer (const line_table_case &case_)
2383 {
2384   /* Create a tempfile and write some text to it.  */
2385   const char *content =
2386     /*00000000011111111112222222222333333.3333444444444.455555555556
2387       12345678901234567890123456789012345.6789012345678.901234567890.  */
2388     ("test_name /* c-style comment */\n"
2389      "                                  \"test literal\"\n"
2390      " // test c++-style comment\n"
2391      "   42\n");
2392   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2393
2394   line_table_test ltt (case_);
2395
2396   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2397
2398   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2399   ASSERT_NE (fname, NULL);
2400
2401   /* Verify that we get the expected tokens back, with the correct
2402      location information.  */
2403
2404   location_t loc;
2405   const cpp_token *tok;
2406   tok = cpp_get_token_with_location (parser, &loc);
2407   ASSERT_NE (tok, NULL);
2408   ASSERT_EQ (tok->type, CPP_NAME);
2409   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2410   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2411
2412   tok = cpp_get_token_with_location (parser, &loc);
2413   ASSERT_NE (tok, NULL);
2414   ASSERT_EQ (tok->type, CPP_STRING);
2415   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2416   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2417
2418   tok = cpp_get_token_with_location (parser, &loc);
2419   ASSERT_NE (tok, NULL);
2420   ASSERT_EQ (tok->type, CPP_NUMBER);
2421   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2422   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2423
2424   tok = cpp_get_token_with_location (parser, &loc);
2425   ASSERT_NE (tok, NULL);
2426   ASSERT_EQ (tok->type, CPP_EOF);
2427
2428   cpp_finish (parser, NULL);
2429   cpp_destroy (parser);
2430 }
2431
2432 /* Forward decls.  */
2433
2434 class lexer_test;
2435 class lexer_test_options;
2436
2437 /* A class for specifying options of a lexer_test.
2438    The "apply" vfunc is called during the lexer_test constructor.  */
2439
2440 class lexer_test_options
2441 {
2442  public:
2443   virtual void apply (lexer_test &) = 0;
2444 };
2445
2446 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2447    in its dtor.
2448
2449    This is needed by struct lexer_test to ensure that the cleanup of the
2450    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2451
2452 class cpp_reader_ptr
2453 {
2454  public:
2455   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2456
2457   ~cpp_reader_ptr ()
2458   {
2459     cpp_finish (m_ptr, NULL);
2460     cpp_destroy (m_ptr);
2461   }
2462
2463   operator cpp_reader * () const { return m_ptr; }
2464
2465  private:
2466   cpp_reader *m_ptr;
2467 };
2468
2469 /* A struct for writing lexer tests.  */
2470
2471 class lexer_test
2472 {
2473 public:
2474   lexer_test (const line_table_case &case_, const char *content,
2475               lexer_test_options *options);
2476   ~lexer_test ();
2477
2478   const cpp_token *get_token ();
2479
2480   /* The ordering of these fields matters.
2481      The line_table_test must be first, since the cpp_reader_ptr
2482      uses it.
2483      The cpp_reader must be cleaned up *after* the temp_source_file
2484      since the filenames in input.cc's input cache are owned by the
2485      cpp_reader; in particular, when ~temp_source_file evicts the
2486      filename the filenames must still be alive.  */
2487   line_table_test m_ltt;
2488   cpp_reader_ptr m_parser;
2489   temp_source_file m_tempfile;
2490   string_concat_db m_concats;
2491   bool m_implicitly_expect_EOF;
2492 };
2493
2494 /* Use an EBCDIC encoding for the execution charset, specifically
2495    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2496
2497    This exercises iconv integration within libcpp.
2498    Not every build of iconv supports the given charset,
2499    so we need to flag this error and handle it gracefully.  */
2500
2501 class ebcdic_execution_charset : public lexer_test_options
2502 {
2503  public:
2504   ebcdic_execution_charset () : m_num_iconv_errors (0)
2505     {
2506       gcc_assert (s_singleton == NULL);
2507       s_singleton = this;
2508     }
2509   ~ebcdic_execution_charset ()
2510     {
2511       gcc_assert (s_singleton == this);
2512       s_singleton = NULL;
2513     }
2514
2515   void apply (lexer_test &test) final override
2516   {
2517     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2518     cpp_opts->narrow_charset = "IBM1047";
2519
2520     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2521     callbacks->diagnostic = on_diagnostic;
2522   }
2523
2524   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2525                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2526                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2527                              rich_location *richloc ATTRIBUTE_UNUSED,
2528                              const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2529     ATTRIBUTE_FPTR_PRINTF(5,0)
2530   {
2531     gcc_assert (s_singleton);
2532     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2533     const char *msg = "conversion from %s to %s not supported by iconv";
2534 #ifdef ENABLE_NLS
2535     msg = dgettext ("cpplib", msg);
2536 #endif
2537     /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
2538        when the local iconv build doesn't support the conversion.  */
2539     if (strcmp (msgid, msg) == 0)
2540       {
2541         s_singleton->m_num_iconv_errors++;
2542         return true;
2543       }
2544
2545     /* Otherwise, we have an unexpected error.  */
2546     abort ();
2547   }
2548
2549   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2550
2551  private:
2552   static ebcdic_execution_charset *s_singleton;
2553   int m_num_iconv_errors;
2554 };
2555
2556 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2557
2558 /* A lexer_test_options subclass that records a list of diagnostic
2559    messages emitted by the lexer.  */
2560
2561 class lexer_diagnostic_sink : public lexer_test_options
2562 {
2563  public:
2564   lexer_diagnostic_sink ()
2565   {
2566     gcc_assert (s_singleton == NULL);
2567     s_singleton = this;
2568   }
2569   ~lexer_diagnostic_sink ()
2570   {
2571     gcc_assert (s_singleton == this);
2572     s_singleton = NULL;
2573
2574     int i;
2575     char *str;
2576     FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2577       free (str);
2578   }
2579
2580   void apply (lexer_test &test) final override
2581   {
2582     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2583     callbacks->diagnostic = on_diagnostic;
2584   }
2585
2586   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2587                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2588                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2589                              rich_location *richloc ATTRIBUTE_UNUSED,
2590                              const char *msgid, va_list *ap)
2591     ATTRIBUTE_FPTR_PRINTF(5,0)
2592   {
2593     char *msg = xvasprintf (msgid, *ap);
2594     s_singleton->m_diagnostics.safe_push (msg);
2595     return true;
2596   }
2597
2598   auto_vec<char *> m_diagnostics;
2599
2600  private:
2601   static lexer_diagnostic_sink *s_singleton;
2602 };
2603
2604 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2605
2606 /* Constructor.  Override line_table with a new instance based on CASE_,
2607    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2608    start parsing the tempfile.  */
2609
2610 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2611                         lexer_test_options *options)
2612 : m_ltt (case_),
2613   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2614   /* Create a tempfile and write the text to it.  */
2615   m_tempfile (SELFTEST_LOCATION, ".c", content),
2616   m_concats (),
2617   m_implicitly_expect_EOF (true)
2618 {
2619   if (options)
2620     options->apply (*this);
2621
2622   cpp_init_iconv (m_parser);
2623
2624   /* Parse the file.  */
2625   const char *fname = cpp_read_main_file (m_parser,
2626                                           m_tempfile.get_filename ());
2627   ASSERT_NE (fname, NULL);
2628 }
2629
2630 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2631
2632 lexer_test::~lexer_test ()
2633 {
2634   location_t loc;
2635   const cpp_token *tok;
2636
2637   if (m_implicitly_expect_EOF)
2638     {
2639       tok = cpp_get_token_with_location (m_parser, &loc);
2640       ASSERT_NE (tok, NULL);
2641       ASSERT_EQ (tok->type, CPP_EOF);
2642     }
2643 }
2644
2645 /* Get the next token from m_parser.  */
2646
2647 const cpp_token *
2648 lexer_test::get_token ()
2649 {
2650   location_t loc;
2651   const cpp_token *tok;
2652
2653   tok = cpp_get_token_with_location (m_parser, &loc);
2654   ASSERT_NE (tok, NULL);
2655   return tok;
2656 }
2657
2658 /* Verify that locations within string literals are correctly handled.  */
2659
2660 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2661    using the string concatenation database for TEST.
2662
2663    Assert that the character at index IDX is on EXPECTED_LINE,
2664    and that it begins at column EXPECTED_START_COL and ends at
2665    EXPECTED_FINISH_COL (unless the locations are beyond
2666    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2667    columns).  */
2668
2669 static void
2670 assert_char_at_range (const location &loc,
2671                       lexer_test& test,
2672                       location_t strloc, enum cpp_ttype type, int idx,
2673                       int expected_line, int expected_start_col,
2674                       int expected_finish_col)
2675 {
2676   cpp_reader *pfile = test.m_parser;
2677   string_concat_db *concats = &test.m_concats;
2678
2679   source_range actual_range = source_range();
2680   const char *err
2681     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2682                                  &actual_range);
2683   if (should_have_column_data_p (strloc))
2684     ASSERT_EQ_AT (loc, NULL, err);
2685   else
2686     {
2687       ASSERT_STREQ_AT (loc,
2688                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2689                        err);
2690       return;
2691     }
2692
2693   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2694   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2695   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2696   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2697
2698   if (should_have_column_data_p (actual_range.m_start))
2699     {
2700       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2701       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2702     }
2703   if (should_have_column_data_p (actual_range.m_finish))
2704     {
2705       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2706       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2707     }
2708 }
2709
2710 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2711    the effective location of any errors.  */
2712
2713 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2714                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2715   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2716                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2717                         (EXPECTED_FINISH_COL))
2718
2719 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2720    using the string concatenation database for TEST.
2721
2722    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2723
2724 static void
2725 assert_num_substring_ranges (const location &loc,
2726                              lexer_test& test,
2727                              location_t strloc,
2728                              enum cpp_ttype type,
2729                              int expected_num_ranges)
2730 {
2731   cpp_reader *pfile = test.m_parser;
2732   string_concat_db *concats = &test.m_concats;
2733
2734   int actual_num_ranges = -1;
2735   const char *err
2736     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2737                                            &actual_num_ranges);
2738   if (should_have_column_data_p (strloc))
2739     ASSERT_EQ_AT (loc, NULL, err);
2740   else
2741     {
2742       ASSERT_STREQ_AT (loc,
2743                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2744                        err);
2745       return;
2746     }
2747   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2748 }
2749
2750 /* Macro for calling assert_num_substring_ranges, supplying
2751    SELFTEST_LOCATION for the effective location of any errors.  */
2752
2753 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2754                                     EXPECTED_NUM_RANGES)                \
2755   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2756                                (TYPE), (EXPECTED_NUM_RANGES))
2757
2758
2759 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2760    returns an error (using the string concatenation database for TEST).  */
2761
2762 static void
2763 assert_has_no_substring_ranges (const location &loc,
2764                                 lexer_test& test,
2765                                 location_t strloc,
2766                                 enum cpp_ttype type,
2767                                 const char *expected_err)
2768 {
2769   cpp_reader *pfile = test.m_parser;
2770   string_concat_db *concats = &test.m_concats;
2771   cpp_substring_ranges ranges;
2772   const char *actual_err
2773     = get_substring_ranges_for_loc (pfile, concats, strloc,
2774                                     type, ranges);
2775   if (should_have_column_data_p (strloc))
2776     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2777   else
2778     ASSERT_STREQ_AT (loc,
2779                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2780                      actual_err);
2781 }
2782
2783 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2784     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2785                                     (STRLOC), (TYPE), (ERR))
2786
2787 /* Lex a simple string literal.  Verify the substring location data, before
2788    and after running cpp_interpret_string on it.  */
2789
2790 static void
2791 test_lexer_string_locations_simple (const line_table_case &case_)
2792 {
2793   /* Digits 0-9 (with 0 at column 10), the simple way.
2794      ....................000000000.11111111112.2222222223333333333
2795      ....................123456789.01234567890.1234567890123456789
2796      We add a trailing comment to ensure that we correctly locate
2797      the end of the string literal token.  */
2798   const char *content = "        \"0123456789\" /* not a string */\n";
2799   lexer_test test (case_, content, NULL);
2800
2801   /* Verify that we get the expected token back, with the correct
2802      location information.  */
2803   const cpp_token *tok = test.get_token ();
2804   ASSERT_EQ (tok->type, CPP_STRING);
2805   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2806   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2807
2808   /* At this point in lexing, the quote characters are treated as part of
2809      the string (they are stripped off by cpp_interpret_string).  */
2810
2811   ASSERT_EQ (tok->val.str.len, 12);
2812
2813   /* Verify that cpp_interpret_string works.  */
2814   cpp_string dst_string;
2815   const enum cpp_ttype type = CPP_STRING;
2816   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2817                                       &dst_string, type);
2818   ASSERT_TRUE (result);
2819   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2820   free (const_cast <unsigned char *> (dst_string.text));
2821
2822   /* Verify ranges of individual characters.  This no longer includes the
2823      opening quote, but does include the closing quote.  */
2824   for (int i = 0; i <= 10; i++)
2825     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2826                           10 + i, 10 + i);
2827
2828   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2829 }
2830
2831 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2832    encoding.  */
2833
2834 static void
2835 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2836 {
2837   /* EBCDIC support requires iconv.  */
2838   if (!HAVE_ICONV)
2839     return;
2840
2841   /* Digits 0-9 (with 0 at column 10), the simple way.
2842      ....................000000000.11111111112.2222222223333333333
2843      ....................123456789.01234567890.1234567890123456789
2844      We add a trailing comment to ensure that we correctly locate
2845      the end of the string literal token.  */
2846   const char *content = "        \"0123456789\" /* not a string */\n";
2847   ebcdic_execution_charset use_ebcdic;
2848   lexer_test test (case_, content, &use_ebcdic);
2849
2850   /* Verify that we get the expected token back, with the correct
2851      location information.  */
2852   const cpp_token *tok = test.get_token ();
2853   ASSERT_EQ (tok->type, CPP_STRING);
2854   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2855   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2856
2857   /* At this point in lexing, the quote characters are treated as part of
2858      the string (they are stripped off by cpp_interpret_string).  */
2859
2860   ASSERT_EQ (tok->val.str.len, 12);
2861
2862   /* The remainder of the test requires an iconv implementation that
2863      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2864   if (use_ebcdic.iconv_errors_occurred_p ())
2865     return;
2866
2867   /* Verify that cpp_interpret_string works.  */
2868   cpp_string dst_string;
2869   const enum cpp_ttype type = CPP_STRING;
2870   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2871                                       &dst_string, type);
2872   ASSERT_TRUE (result);
2873   /* We should now have EBCDIC-encoded text, specifically
2874      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2875      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2876   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2877                 (const char *)dst_string.text);
2878   free (const_cast <unsigned char *> (dst_string.text));
2879
2880   /* Verify that we don't attempt to record substring location information
2881      for such cases.  */
2882   ASSERT_HAS_NO_SUBSTRING_RANGES
2883     (test, tok->src_loc, type,
2884      "execution character set != source character set");
2885 }
2886
2887 /* Lex a string literal containing a hex-escaped character.
2888    Verify the substring location data, before and after running
2889    cpp_interpret_string on it.  */
2890
2891 static void
2892 test_lexer_string_locations_hex (const line_table_case &case_)
2893 {
2894   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2895      and with a space in place of digit 6, to terminate the escaped
2896      hex code.
2897      ....................000000000.111111.11112222.
2898      ....................123456789.012345.67890123.  */
2899   const char *content = "        \"01234\\x35 789\"\n";
2900   lexer_test test (case_, content, NULL);
2901
2902   /* Verify that we get the expected token back, with the correct
2903      location information.  */
2904   const cpp_token *tok = test.get_token ();
2905   ASSERT_EQ (tok->type, CPP_STRING);
2906   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2907   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2908
2909   /* At this point in lexing, the quote characters are treated as part of
2910      the string (they are stripped off by cpp_interpret_string).  */
2911   ASSERT_EQ (tok->val.str.len, 15);
2912
2913   /* Verify that cpp_interpret_string works.  */
2914   cpp_string dst_string;
2915   const enum cpp_ttype type = CPP_STRING;
2916   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2917                                       &dst_string, type);
2918   ASSERT_TRUE (result);
2919   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2920   free (const_cast <unsigned char *> (dst_string.text));
2921
2922   /* Verify ranges of individual characters.  This no longer includes the
2923      opening quote, but does include the closing quote.  */
2924   for (int i = 0; i <= 4; i++)
2925     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2926   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2927   for (int i = 6; i <= 10; i++)
2928     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2929
2930   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2931 }
2932
2933 /* Lex a string literal containing an octal-escaped character.
2934    Verify the substring location data after running cpp_interpret_string
2935    on it.  */
2936
2937 static void
2938 test_lexer_string_locations_oct (const line_table_case &case_)
2939 {
2940   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2941      and with a space in place of digit 6, to terminate the escaped
2942      octal code.
2943      ....................000000000.111111.11112222.2222223333333333444
2944      ....................123456789.012345.67890123.4567890123456789012  */
2945   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2946   lexer_test test (case_, content, NULL);
2947
2948   /* Verify that we get the expected token back, with the correct
2949      location information.  */
2950   const cpp_token *tok = test.get_token ();
2951   ASSERT_EQ (tok->type, CPP_STRING);
2952   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2953
2954   /* Verify that cpp_interpret_string works.  */
2955   cpp_string dst_string;
2956   const enum cpp_ttype type = CPP_STRING;
2957   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2958                                       &dst_string, type);
2959   ASSERT_TRUE (result);
2960   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2961   free (const_cast <unsigned char *> (dst_string.text));
2962
2963   /* Verify ranges of individual characters.  This no longer includes the
2964      opening quote, but does include the closing quote.  */
2965   for (int i = 0; i < 5; i++)
2966     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2967   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2968   for (int i = 6; i <= 10; i++)
2969     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2970
2971   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2972 }
2973
2974 /* Test of string literal containing letter escapes.  */
2975
2976 static void
2977 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2978 {
2979   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2980      .....................000000000.1.11111.1.1.11222.22222223333333
2981      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2982   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2983   lexer_test test (case_, content, NULL);
2984
2985   /* Verify that we get the expected tokens back.  */
2986   const cpp_token *tok = test.get_token ();
2987   ASSERT_EQ (tok->type, CPP_STRING);
2988   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2989
2990   /* Verify ranges of individual characters. */
2991   /* "\t".  */
2992   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2993                         0, 1, 10, 11);
2994   /* "foo". */
2995   for (int i = 1; i <= 3; i++)
2996     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2997                           i, 1, 11 + i, 11 + i);
2998   /* "\\" and "\n".  */
2999   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3000                         4, 1, 15, 16);
3001   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3002                         5, 1, 17, 18);
3003
3004   /* "bar" and closing quote for nul-terminator.  */
3005   for (int i = 6; i <= 9; i++)
3006     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3007                           i, 1, 13 + i, 13 + i);
3008
3009   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
3010 }
3011
3012 /* Another test of a string literal containing a letter escape.
3013    Based on string seen in
3014      printf ("%-%\n");
3015    in gcc.dg/format/c90-printf-1.c.  */
3016
3017 static void
3018 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
3019 {
3020   /* .....................000000000.1111.11.1111.22222222223.
3021      .....................123456789.0123.45.6789.01234567890.  */
3022   const char *content = ("        \"%-%\\n\" /* non-str */\n");
3023   lexer_test test (case_, content, NULL);
3024
3025   /* Verify that we get the expected tokens back.  */
3026   const cpp_token *tok = test.get_token ();
3027   ASSERT_EQ (tok->type, CPP_STRING);
3028   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
3029
3030   /* Verify ranges of individual characters. */
3031   /* "%-%".  */
3032   for (int i = 0; i < 3; i++)
3033     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3034                           i, 1, 10 + i, 10 + i);
3035   /* "\n".  */
3036   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3037                         3, 1, 13, 14);
3038
3039   /* Closing quote for nul-terminator.  */
3040   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3041                         4, 1, 15, 15);
3042
3043   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
3044 }
3045
3046 /* Lex a string literal containing UCN 4 characters.
3047    Verify the substring location data after running cpp_interpret_string
3048    on it.  */
3049
3050 static void
3051 test_lexer_string_locations_ucn4 (const line_table_case &case_)
3052 {
3053   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
3054      as UCN 4.
3055      ....................000000000.111111.111122.222222223.33333333344444
3056      ....................123456789.012345.678901.234567890.12345678901234  */
3057   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
3058   lexer_test test (case_, content, NULL);
3059
3060   /* Verify that we get the expected token back, with the correct
3061      location information.  */
3062   const cpp_token *tok = test.get_token ();
3063   ASSERT_EQ (tok->type, CPP_STRING);
3064   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
3065
3066   /* Verify that cpp_interpret_string works.
3067      The string should be encoded in the execution character
3068      set.  Assuming that is UTF-8, we should have the following:
3069      -----------  ----  -----  -------  ----------------
3070      Byte offset  Byte  Octal  Unicode  Source Column(s)
3071      -----------  ----  -----  -------  ----------------
3072      0            0x30         '0'      10
3073      1            0x31         '1'      11
3074      2            0x32         '2'      12
3075      3            0x33         '3'      13
3076      4            0x34         '4'      14
3077      5            0xE2  \342   U+2174   15-20
3078      6            0x85  \205    (cont)  15-20
3079      7            0xB4  \264    (cont)  15-20
3080      8            0xE2  \342   U+2175   21-26
3081      9            0x85  \205    (cont)  21-26
3082      10           0xB5  \265    (cont)  21-26
3083      11           0x37         '7'      27
3084      12           0x38         '8'      28
3085      13           0x39         '9'      29
3086      14           0x00                  30 (closing quote)
3087      -----------  ----  -----  -------  ---------------.  */
3088
3089   cpp_string dst_string;
3090   const enum cpp_ttype type = CPP_STRING;
3091   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3092                                       &dst_string, type);
3093   ASSERT_TRUE (result);
3094   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3095                 (const char *)dst_string.text);
3096   free (const_cast <unsigned char *> (dst_string.text));
3097
3098   /* Verify ranges of individual characters.  This no longer includes the
3099      opening quote, but does include the closing quote.
3100      '01234'.  */
3101   for (int i = 0; i <= 4; i++)
3102     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3103   /* U+2174.  */
3104   for (int i = 5; i <= 7; i++)
3105     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
3106   /* U+2175.  */
3107   for (int i = 8; i <= 10; i++)
3108     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
3109   /* '789' and nul terminator  */
3110   for (int i = 11; i <= 14; i++)
3111     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
3112
3113   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3114 }
3115
3116 /* Lex a string literal containing UCN 8 characters.
3117    Verify the substring location data after running cpp_interpret_string
3118    on it.  */
3119
3120 static void
3121 test_lexer_string_locations_ucn8 (const line_table_case &case_)
3122 {
3123   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
3124      ....................000000000.111111.1111222222.2222333333333.344444
3125      ....................123456789.012345.6789012345.6789012345678.901234  */
3126   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
3127   lexer_test test (case_, content, NULL);
3128
3129   /* Verify that we get the expected token back, with the correct
3130      location information.  */
3131   const cpp_token *tok = test.get_token ();
3132   ASSERT_EQ (tok->type, CPP_STRING);
3133   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
3134                            "\"01234\\U00002174\\U00002175789\"");
3135
3136   /* Verify that cpp_interpret_string works.
3137      The UTF-8 encoding of the string is identical to that from
3138      the ucn4 testcase above; the only difference is the column
3139      locations.  */
3140   cpp_string dst_string;
3141   const enum cpp_ttype type = CPP_STRING;
3142   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3143                                       &dst_string, type);
3144   ASSERT_TRUE (result);
3145   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3146                 (const char *)dst_string.text);
3147   free (const_cast <unsigned char *> (dst_string.text));
3148
3149   /* Verify ranges of individual characters.  This no longer includes the
3150      opening quote, but does include the closing quote.
3151      '01234'.  */
3152   for (int i = 0; i <= 4; i++)
3153     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3154   /* U+2174.  */
3155   for (int i = 5; i <= 7; i++)
3156     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
3157   /* U+2175.  */
3158   for (int i = 8; i <= 10; i++)
3159     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
3160   /* '789' at columns 35-37  */
3161   for (int i = 11; i <= 13; i++)
3162     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
3163   /* Closing quote/nul-terminator at column 38.  */
3164   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
3165
3166   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3167 }
3168
3169 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
3170
3171 static uint32_t
3172 uint32_from_big_endian (const uint32_t *ptr_be_value)
3173 {
3174   const unsigned char *buf = (const unsigned char *)ptr_be_value;
3175   return (((uint32_t) buf[0] << 24)
3176           | ((uint32_t) buf[1] << 16)
3177           | ((uint32_t) buf[2] << 8)
3178           | (uint32_t) buf[3]);
3179 }
3180
3181 /* Lex a wide string literal and verify that attempts to read substring
3182    location data from it fail gracefully.  */
3183
3184 static void
3185 test_lexer_string_locations_wide_string (const line_table_case &case_)
3186 {
3187   /* Digits 0-9.
3188      ....................000000000.11111111112.22222222233333
3189      ....................123456789.01234567890.12345678901234  */
3190   const char *content = "       L\"0123456789\" /* non-str */\n";
3191   lexer_test test (case_, content, NULL);
3192
3193   /* Verify that we get the expected token back, with the correct
3194      location information.  */
3195   const cpp_token *tok = test.get_token ();
3196   ASSERT_EQ (tok->type, CPP_WSTRING);
3197   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
3198
3199   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
3200   cpp_string dst_string;
3201   const enum cpp_ttype type = CPP_WSTRING;
3202   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3203                                       &dst_string, type);
3204   ASSERT_TRUE (result);
3205   /* The cpp_reader defaults to big-endian with
3206      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3207      now be encoded as UTF-32BE.  */
3208   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3209   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3210   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3211   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3212   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3213   free (const_cast <unsigned char *> (dst_string.text));
3214
3215   /* We don't yet support generating substring location information
3216      for L"" strings.  */
3217   ASSERT_HAS_NO_SUBSTRING_RANGES
3218     (test, tok->src_loc, type,
3219      "execution character set != source character set");
3220 }
3221
3222 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
3223
3224 static uint16_t
3225 uint16_from_big_endian (const uint16_t *ptr_be_value)
3226 {
3227   const unsigned char *buf = (const unsigned char *)ptr_be_value;
3228   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3229 }
3230
3231 /* Lex a u"" string literal and verify that attempts to read substring
3232    location data from it fail gracefully.  */
3233
3234 static void
3235 test_lexer_string_locations_string16 (const line_table_case &case_)
3236 {
3237   /* Digits 0-9.
3238      ....................000000000.11111111112.22222222233333
3239      ....................123456789.01234567890.12345678901234  */
3240   const char *content = "       u\"0123456789\" /* non-str */\n";
3241   lexer_test test (case_, content, NULL);
3242
3243   /* Verify that we get the expected token back, with the correct
3244      location information.  */
3245   const cpp_token *tok = test.get_token ();
3246   ASSERT_EQ (tok->type, CPP_STRING16);
3247   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3248
3249   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
3250   cpp_string dst_string;
3251   const enum cpp_ttype type = CPP_STRING16;
3252   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3253                                       &dst_string, type);
3254   ASSERT_TRUE (result);
3255
3256   /* The cpp_reader defaults to big-endian, so dst_string should
3257      now be encoded as UTF-16BE.  */
3258   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3259   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3260   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3261   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3262   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3263   free (const_cast <unsigned char *> (dst_string.text));
3264
3265   /* We don't yet support generating substring location information
3266      for L"" strings.  */
3267   ASSERT_HAS_NO_SUBSTRING_RANGES
3268     (test, tok->src_loc, type,
3269      "execution character set != source character set");
3270 }
3271
3272 /* Lex a U"" string literal and verify that attempts to read substring
3273    location data from it fail gracefully.  */
3274
3275 static void
3276 test_lexer_string_locations_string32 (const line_table_case &case_)
3277 {
3278   /* Digits 0-9.
3279      ....................000000000.11111111112.22222222233333
3280      ....................123456789.01234567890.12345678901234  */
3281   const char *content = "       U\"0123456789\" /* non-str */\n";
3282   lexer_test test (case_, content, NULL);
3283
3284   /* Verify that we get the expected token back, with the correct
3285      location information.  */
3286   const cpp_token *tok = test.get_token ();
3287   ASSERT_EQ (tok->type, CPP_STRING32);
3288   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3289
3290   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
3291   cpp_string dst_string;
3292   const enum cpp_ttype type = CPP_STRING32;
3293   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3294                                       &dst_string, type);
3295   ASSERT_TRUE (result);
3296
3297   /* The cpp_reader defaults to big-endian, so dst_string should
3298      now be encoded as UTF-32BE.  */
3299   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3300   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3301   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3302   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3303   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3304   free (const_cast <unsigned char *> (dst_string.text));
3305
3306   /* We don't yet support generating substring location information
3307      for L"" strings.  */
3308   ASSERT_HAS_NO_SUBSTRING_RANGES
3309     (test, tok->src_loc, type,
3310      "execution character set != source character set");
3311 }
3312
3313 /* Lex a u8-string literal.
3314    Verify the substring location data after running cpp_interpret_string
3315    on it.  */
3316
3317 static void
3318 test_lexer_string_locations_u8 (const line_table_case &case_)
3319 {
3320   /* Digits 0-9.
3321      ....................000000000.11111111112.22222222233333
3322      ....................123456789.01234567890.12345678901234  */
3323   const char *content = "      u8\"0123456789\" /* non-str */\n";
3324   lexer_test test (case_, content, NULL);
3325
3326   /* Verify that we get the expected token back, with the correct
3327      location information.  */
3328   const cpp_token *tok = test.get_token ();
3329   ASSERT_EQ (tok->type, CPP_UTF8STRING);
3330   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3331
3332   /* Verify that cpp_interpret_string works.  */
3333   cpp_string dst_string;
3334   const enum cpp_ttype type = CPP_STRING;
3335   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3336                                       &dst_string, type);
3337   ASSERT_TRUE (result);
3338   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3339   free (const_cast <unsigned char *> (dst_string.text));
3340
3341   /* Verify ranges of individual characters.  This no longer includes the
3342      opening quote, but does include the closing quote.  */
3343   for (int i = 0; i <= 10; i++)
3344     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3345 }
3346
3347 /* Lex a string literal containing UTF-8 source characters.
3348    Verify the substring location data after running cpp_interpret_string
3349    on it.  */
3350
3351 static void
3352 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3353 {
3354  /* This string literal is written out to the source file as UTF-8,
3355     and is of the form "before mojibake after", where "mojibake"
3356     is written as the following four unicode code points:
3357        U+6587 CJK UNIFIED IDEOGRAPH-6587
3358        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3359        U+5316 CJK UNIFIED IDEOGRAPH-5316
3360        U+3051 HIRAGANA LETTER KE.
3361      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3362      "before" and "after" are 1 byte per unicode character.
3363
3364      The numbering shown are "columns", which are *byte* numbers within
3365      the line, rather than unicode character numbers.
3366
3367      .................... 000000000.1111111.
3368      .................... 123456789.0123456.  */
3369   const char *content = ("        \"before "
3370                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3371                               UTF-8: 0xE6 0x96 0x87
3372                               C octal escaped UTF-8: \346\226\207
3373                             "column" numbers: 17-19.  */
3374                          "\346\226\207"
3375
3376                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3377                               UTF-8: 0xE5 0xAD 0x97
3378                               C octal escaped UTF-8: \345\255\227
3379                             "column" numbers: 20-22.  */
3380                          "\345\255\227"
3381
3382                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3383                               UTF-8: 0xE5 0x8C 0x96
3384                               C octal escaped UTF-8: \345\214\226
3385                             "column" numbers: 23-25.  */
3386                          "\345\214\226"
3387
3388                          /* U+3051 HIRAGANA LETTER KE
3389                               UTF-8: 0xE3 0x81 0x91
3390                               C octal escaped UTF-8: \343\201\221
3391                             "column" numbers: 26-28.  */
3392                          "\343\201\221"
3393
3394                          /* column numbers 29 onwards
3395                           2333333.33334444444444
3396                           9012345.67890123456789. */
3397                          " after\" /* non-str */\n");
3398   lexer_test test (case_, content, NULL);
3399
3400   /* Verify that we get the expected token back, with the correct
3401      location information.  */
3402   const cpp_token *tok = test.get_token ();
3403   ASSERT_EQ (tok->type, CPP_STRING);
3404   ASSERT_TOKEN_AS_TEXT_EQ
3405     (test.m_parser, tok,
3406      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3407
3408   /* Verify that cpp_interpret_string works.  */
3409   cpp_string dst_string;
3410   const enum cpp_ttype type = CPP_STRING;
3411   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3412                                       &dst_string, type);
3413   ASSERT_TRUE (result);
3414   ASSERT_STREQ
3415     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3416      (const char *)dst_string.text);
3417   free (const_cast <unsigned char *> (dst_string.text));
3418
3419   /* Verify ranges of individual characters.  This no longer includes the
3420      opening quote, but does include the closing quote.
3421      Assuming that both source and execution encodings are UTF-8, we have
3422      a run of 25 octets in each, plus the NUL terminator.  */
3423   for (int i = 0; i < 25; i++)
3424     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3425   /* NUL-terminator should use the closing quote at column 35.  */
3426   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3427
3428   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3429 }
3430
3431 /* Test of string literal concatenation.  */
3432
3433 static void
3434 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3435 {
3436   /* Digits 0-9.
3437      .....................000000000.111111.11112222222222
3438      .....................123456789.012345.67890123456789.  */
3439   const char *content = ("        \"01234\" /* non-str */\n"
3440                          "        \"56789\" /* non-str */\n");
3441   lexer_test test (case_, content, NULL);
3442
3443   location_t input_locs[2];
3444
3445   /* Verify that we get the expected tokens back.  */
3446   auto_vec <cpp_string> input_strings;
3447   const cpp_token *tok_a = test.get_token ();
3448   ASSERT_EQ (tok_a->type, CPP_STRING);
3449   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3450   input_strings.safe_push (tok_a->val.str);
3451   input_locs[0] = tok_a->src_loc;
3452
3453   const cpp_token *tok_b = test.get_token ();
3454   ASSERT_EQ (tok_b->type, CPP_STRING);
3455   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3456   input_strings.safe_push (tok_b->val.str);
3457   input_locs[1] = tok_b->src_loc;
3458
3459   /* Verify that cpp_interpret_string works.  */
3460   cpp_string dst_string;
3461   const enum cpp_ttype type = CPP_STRING;
3462   bool result = cpp_interpret_string (test.m_parser,
3463                                       input_strings.address (), 2,
3464                                       &dst_string, type);
3465   ASSERT_TRUE (result);
3466   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3467   free (const_cast <unsigned char *> (dst_string.text));
3468
3469   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3470   test.m_concats.record_string_concatenation (2, input_locs);
3471
3472   location_t initial_loc = input_locs[0];
3473
3474   /* "01234" on line 1.  */
3475   for (int i = 0; i <= 4; i++)
3476     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3477   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3478   for (int i = 5; i <= 10; i++)
3479     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3480
3481   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3482 }
3483
3484 /* Another test of string literal concatenation.  */
3485
3486 static void
3487 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3488 {
3489   /* Digits 0-9.
3490      .....................000000000.111.11111112222222
3491      .....................123456789.012.34567890123456.  */
3492   const char *content = ("        \"01\" /* non-str */\n"
3493                          "        \"23\" /* non-str */\n"
3494                          "        \"45\" /* non-str */\n"
3495                          "        \"67\" /* non-str */\n"
3496                          "        \"89\" /* non-str */\n");
3497   lexer_test test (case_, content, NULL);
3498
3499   auto_vec <cpp_string> input_strings;
3500   location_t input_locs[5];
3501
3502   /* Verify that we get the expected tokens back.  */
3503   for (int i = 0; i < 5; i++)
3504     {
3505       const cpp_token *tok = test.get_token ();
3506       ASSERT_EQ (tok->type, CPP_STRING);
3507       input_strings.safe_push (tok->val.str);
3508       input_locs[i] = tok->src_loc;
3509     }
3510
3511   /* Verify that cpp_interpret_string works.  */
3512   cpp_string dst_string;
3513   const enum cpp_ttype type = CPP_STRING;
3514   bool result = cpp_interpret_string (test.m_parser,
3515                                       input_strings.address (), 5,
3516                                       &dst_string, type);
3517   ASSERT_TRUE (result);
3518   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3519   free (const_cast <unsigned char *> (dst_string.text));
3520
3521   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3522   test.m_concats.record_string_concatenation (5, input_locs);
3523
3524   location_t initial_loc = input_locs[0];
3525
3526   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3527      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3528      and expect get_source_range_for_substring to fail.
3529      However, for a string concatenation test, we can have a case
3530      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3531      but subsequent strings can be after it.
3532      Attempting to detect this within assert_char_at_range
3533      would overcomplicate the logic for the common test cases, so
3534      we detect it here.  */
3535   if (should_have_column_data_p (input_locs[0])
3536       && !should_have_column_data_p (input_locs[4]))
3537     {
3538       /* Verify that get_source_range_for_substring gracefully rejects
3539          this case.  */
3540       source_range actual_range;
3541       const char *err
3542         = get_source_range_for_char (test.m_parser, &test.m_concats,
3543                                      initial_loc, type, 0, &actual_range);
3544       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3545       return;
3546     }
3547
3548   for (int i = 0; i < 5; i++)
3549     for (int j = 0; j < 2; j++)
3550       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3551                             i + 1, 10 + j, 10 + j);
3552
3553   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3554   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3555
3556   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3557 }
3558
3559 /* Another test of string literal concatenation, this time combined with
3560    various kinds of escaped characters.  */
3561
3562 static void
3563 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3564 {
3565   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3566      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3567   const char *content
3568     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3569        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3570     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3571   lexer_test test (case_, content, NULL);
3572
3573   auto_vec <cpp_string> input_strings;
3574   location_t input_locs[4];
3575
3576   /* Verify that we get the expected tokens back.  */
3577   for (int i = 0; i < 4; i++)
3578     {
3579       const cpp_token *tok = test.get_token ();
3580       ASSERT_EQ (tok->type, CPP_STRING);
3581       input_strings.safe_push (tok->val.str);
3582       input_locs[i] = tok->src_loc;
3583     }
3584
3585   /* Verify that cpp_interpret_string works.  */
3586   cpp_string dst_string;
3587   const enum cpp_ttype type = CPP_STRING;
3588   bool result = cpp_interpret_string (test.m_parser,
3589                                       input_strings.address (), 4,
3590                                       &dst_string, type);
3591   ASSERT_TRUE (result);
3592   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3593   free (const_cast <unsigned char *> (dst_string.text));
3594
3595   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3596   test.m_concats.record_string_concatenation (4, input_locs);
3597
3598   location_t initial_loc = input_locs[0];
3599
3600   for (int i = 0; i <= 4; i++)
3601     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3602   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3603   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3604   for (int i = 7; i <= 9; i++)
3605     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3606
3607   /* NUL-terminator should use the location of the final closing quote.  */
3608   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3609
3610   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3611 }
3612
3613 /* Test of string literal in a macro.  */
3614
3615 static void
3616 test_lexer_string_locations_macro (const line_table_case &case_)
3617 {
3618   /* Digits 0-9.
3619      .....................0000000001111111111.22222222223.
3620      .....................1234567890123456789.01234567890.  */
3621   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3622                          "  MACRO");
3623   lexer_test test (case_, content, NULL);
3624
3625   /* Verify that we get the expected tokens back.  */
3626   const cpp_token *tok = test.get_token ();
3627   ASSERT_EQ (tok->type, CPP_PADDING);
3628
3629   tok = test.get_token ();
3630   ASSERT_EQ (tok->type, CPP_STRING);
3631   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3632
3633   /* Verify ranges of individual characters.  We ought to
3634      see columns within the macro definition.  */
3635   for (int i = 0; i <= 10; i++)
3636     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3637                           i, 1, 20 + i, 20 + i);
3638
3639   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3640
3641   tok = test.get_token ();
3642   ASSERT_EQ (tok->type, CPP_PADDING);
3643 }
3644
3645 /* Test of stringification of a macro argument.  */
3646
3647 static void
3648 test_lexer_string_locations_stringified_macro_argument
3649   (const line_table_case &case_)
3650 {
3651   /* .....................000000000111111111122222222223.
3652      .....................123456789012345678901234567890.  */
3653   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3654                          "MACRO(foo)\n");
3655   lexer_test test (case_, content, NULL);
3656
3657   /* Verify that we get the expected token back.  */
3658   const cpp_token *tok = test.get_token ();
3659   ASSERT_EQ (tok->type, CPP_PADDING);
3660
3661   tok = test.get_token ();
3662   ASSERT_EQ (tok->type, CPP_STRING);
3663   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3664
3665   /* We don't support getting the location of a stringified macro
3666      argument.  Verify that it fails gracefully.  */
3667   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3668                                   "cpp_interpret_string_1 failed");
3669
3670   tok = test.get_token ();
3671   ASSERT_EQ (tok->type, CPP_PADDING);
3672
3673   tok = test.get_token ();
3674   ASSERT_EQ (tok->type, CPP_PADDING);
3675 }
3676
3677 /* Ensure that we are fail gracefully if something attempts to pass
3678    in a location that isn't a string literal token.  Seen on this code:
3679
3680      const char a[] = " %d ";
3681      __builtin_printf (a, 0.5);
3682                        ^
3683
3684    when c-format.cc erroneously used the indicated one-character
3685    location as the format string location, leading to a read past the
3686    end of a string buffer in cpp_interpret_string_1.  */
3687
3688 static void
3689 test_lexer_string_locations_non_string (const line_table_case &case_)
3690 {
3691   /* .....................000000000111111111122222222223.
3692      .....................123456789012345678901234567890.  */
3693   const char *content = ("         a\n");
3694   lexer_test test (case_, content, NULL);
3695
3696   /* Verify that we get the expected token back.  */
3697   const cpp_token *tok = test.get_token ();
3698   ASSERT_EQ (tok->type, CPP_NAME);
3699   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3700
3701   /* At this point, libcpp is attempting to interpret the name as a
3702      string literal, despite it not starting with a quote.  We don't detect
3703      that, but we should at least fail gracefully.  */
3704   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3705                                   "cpp_interpret_string_1 failed");
3706 }
3707
3708 /* Ensure that we can read substring information for a token which
3709    starts in one linemap and ends in another .  Adapted from
3710    gcc.dg/cpp/pr69985.c.  */
3711
3712 static void
3713 test_lexer_string_locations_long_line (const line_table_case &case_)
3714 {
3715   /* .....................000000.000111111111
3716      .....................123456.789012346789.  */
3717   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3718                          "     \"0123456789012345678901234567890123456789"
3719                          "0123456789012345678901234567890123456789"
3720                          "0123456789012345678901234567890123456789"
3721                          "0123456789\"\n");
3722
3723   lexer_test test (case_, content, NULL);
3724
3725   /* Verify that we get the expected token back.  */
3726   const cpp_token *tok = test.get_token ();
3727   ASSERT_EQ (tok->type, CPP_STRING);
3728
3729   if (!should_have_column_data_p (line_table->highest_location))
3730     return;
3731
3732   /* Verify ranges of individual characters.  */
3733   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3734   for (int i = 0; i < 131; i++)
3735     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3736                           i, 2, 7 + i, 7 + i);
3737 }
3738
3739 /* Test of locations within a raw string that doesn't contain a newline.  */
3740
3741 static void
3742 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3743 {
3744   /* .....................00.0000000111111111122.
3745      .....................12.3456789012345678901.  */
3746   const char *content = ("R\"foo(0123456789)foo\"\n");
3747   lexer_test test (case_, content, NULL);
3748
3749   /* Verify that we get the expected token back.  */
3750   const cpp_token *tok = test.get_token ();
3751   ASSERT_EQ (tok->type, CPP_STRING);
3752
3753   /* Verify that cpp_interpret_string works.  */
3754   cpp_string dst_string;
3755   const enum cpp_ttype type = CPP_STRING;
3756   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3757                                       &dst_string, type);
3758   ASSERT_TRUE (result);
3759   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3760   free (const_cast <unsigned char *> (dst_string.text));
3761
3762   if (!should_have_column_data_p (line_table->highest_location))
3763     return;
3764
3765   /* 0-9, plus the nil terminator.  */
3766   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3767   for (int i = 0; i < 11; i++)
3768     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3769                           i, 1, 7 + i, 7 + i);
3770 }
3771
3772 /* Test of locations within a raw string that contains a newline.  */
3773
3774 static void
3775 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3776 {
3777   /* .....................00.0000.
3778      .....................12.3456.  */
3779   const char *content = ("R\"foo(\n"
3780   /* .....................00000.
3781      .....................12345.  */
3782                          "hello\n"
3783                          "world\n"
3784   /* .....................00000.
3785      .....................12345.  */
3786                          ")foo\"\n");
3787   lexer_test test (case_, content, NULL);
3788
3789   /* Verify that we get the expected token back.  */
3790   const cpp_token *tok = test.get_token ();
3791   ASSERT_EQ (tok->type, CPP_STRING);
3792
3793   /* Verify that cpp_interpret_string works.  */
3794   cpp_string dst_string;
3795   const enum cpp_ttype type = CPP_STRING;
3796   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3797                                       &dst_string, type);
3798   ASSERT_TRUE (result);
3799   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3800   free (const_cast <unsigned char *> (dst_string.text));
3801
3802   if (!should_have_column_data_p (line_table->highest_location))
3803     return;
3804
3805   /* Currently we don't support locations within raw strings that
3806      contain newlines.  */
3807   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3808                                   "range endpoints are on different lines");
3809 }
3810
3811 /* Test of parsing an unterminated raw string.  */
3812
3813 static void
3814 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3815 {
3816   const char *content = "R\"ouch()ouCh\" /* etc */";
3817
3818   lexer_diagnostic_sink diagnostics;
3819   lexer_test test (case_, content, &diagnostics);
3820   test.m_implicitly_expect_EOF = false;
3821
3822   /* Attempt to parse the raw string.  */
3823   const cpp_token *tok = test.get_token ();
3824   ASSERT_EQ (tok->type, CPP_EOF);
3825
3826   ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3827   /* We expect the message "unterminated raw string"
3828      in the "cpplib" translation domain.
3829      It's not clear that dgettext is available on all supported hosts,
3830      so this assertion is commented-out for now.
3831        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3832                      diagnostics.m_diagnostics[0]);
3833   */
3834 }
3835
3836 /* Test of lexing char constants.  */
3837
3838 static void
3839 test_lexer_char_constants (const line_table_case &case_)
3840 {
3841   /* Various char constants.
3842      .....................0000000001111111111.22222222223.
3843      .....................1234567890123456789.01234567890.  */
3844   const char *content = ("         'a'\n"
3845                          "        u'a'\n"
3846                          "        U'a'\n"
3847                          "        L'a'\n"
3848                          "         'abc'\n");
3849   lexer_test test (case_, content, NULL);
3850
3851   /* Verify that we get the expected tokens back.  */
3852   /* 'a'.  */
3853   const cpp_token *tok = test.get_token ();
3854   ASSERT_EQ (tok->type, CPP_CHAR);
3855   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3856
3857   unsigned int chars_seen;
3858   int unsignedp;
3859   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3860                                           &chars_seen, &unsignedp);
3861   ASSERT_EQ (cc, 'a');
3862   ASSERT_EQ (chars_seen, 1);
3863
3864   /* u'a'.  */
3865   tok = test.get_token ();
3866   ASSERT_EQ (tok->type, CPP_CHAR16);
3867   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3868
3869   /* U'a'.  */
3870   tok = test.get_token ();
3871   ASSERT_EQ (tok->type, CPP_CHAR32);
3872   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3873
3874   /* L'a'.  */
3875   tok = test.get_token ();
3876   ASSERT_EQ (tok->type, CPP_WCHAR);
3877   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3878
3879   /* 'abc' (c-char-sequence).  */
3880   tok = test.get_token ();
3881   ASSERT_EQ (tok->type, CPP_CHAR);
3882   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3883 }
3884 /* A table of interesting location_t values, giving one axis of our test
3885    matrix.  */
3886
3887 static const location_t boundary_locations[] = {
3888   /* Zero means "don't override the default values for a new line_table".  */
3889   0,
3890
3891   /* An arbitrary non-zero value that isn't close to one of
3892      the boundary values below.  */
3893   0x10000,
3894
3895   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3896   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3897   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3898   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3899   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3900   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3901
3902   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3903   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3904   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3905   LINE_MAP_MAX_LOCATION_WITH_COLS,
3906   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3907   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3908 };
3909
3910 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3911
3912 void
3913 for_each_line_table_case (void (*testcase) (const line_table_case &))
3914 {
3915   /* As noted above in the description of struct line_table_case,
3916      we want to explore a test matrix of interesting line_table
3917      situations, running various selftests for each case within the
3918      matrix.  */
3919
3920   /* Run all tests with:
3921      (a) line_table->default_range_bits == 0, and
3922      (b) line_table->default_range_bits == 5.  */
3923   int num_cases_tested = 0;
3924   for (int default_range_bits = 0; default_range_bits <= 5;
3925        default_range_bits += 5)
3926     {
3927       /* ...and use each of the "interesting" location values as
3928          the starting location within line_table.  */
3929       const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
3930       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3931         {
3932           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3933
3934           testcase (c);
3935
3936           num_cases_tested++;
3937         }
3938     }
3939
3940   /* Verify that we fully covered the test matrix.  */
3941   ASSERT_EQ (num_cases_tested, 2 * 12);
3942 }
3943
3944 /* Verify that when presented with a consecutive pair of locations with
3945    a very large line offset, we don't attempt to consolidate them into
3946    a single ordinary linemap where the line offsets within the line map
3947    would lead to overflow (PR lto/88147).  */
3948
3949 static void
3950 test_line_offset_overflow ()
3951 {
3952   line_table_test ltt (line_table_case (5, 0));
3953
3954   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3955   linemap_line_start (line_table, 1, 100);
3956   location_t loc_a = linemap_line_start (line_table, 2578, 255);
3957   assert_loceq ("foo.c", 2578, 0, loc_a);
3958
3959   const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3960   ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3961   ASSERT_EQ (ordmap_a->m_range_bits, 5);
3962
3963   location_t loc_b = linemap_line_start (line_table, 404198, 512);
3964   assert_loceq ("foo.c", 404198, 0, loc_b);
3965
3966   /* We should have started a new linemap, rather than attempting to store
3967      a very large line offset.  */
3968   const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3969   ASSERT_NE (ordmap_a, ordmap_b);
3970 }
3971
3972 void test_cpp_utf8 ()
3973 {
3974   const int def_tabstop = 8;
3975   cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3976
3977   /* Verify that wcwidth of invalid UTF-8 or control bytes is 1.  */
3978   {
3979     int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
3980     ASSERT_EQ (8, w_bad);
3981     int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
3982     ASSERT_EQ (5, w_ctrl);
3983   }
3984
3985   /* Verify that wcwidth of valid UTF-8 is as expected.  */
3986   {
3987     const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
3988     ASSERT_EQ (1, w_pi);
3989     const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
3990     ASSERT_EQ (2, w_emoji);
3991     const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3992                                                         policy);
3993     ASSERT_EQ (1, w_umlaut_precomposed);
3994     const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3995                                                       policy);
3996     ASSERT_EQ (1, w_umlaut_combining);
3997     const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
3998     ASSERT_EQ (2, w_han);
3999     const int w_ascii = cpp_display_width ("GCC", 3, policy);
4000     ASSERT_EQ (3, w_ascii);
4001     const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
4002                                            "\x9f! \xe4\xb8\xba y\xcc\x88",
4003                                            24, policy);
4004     ASSERT_EQ (18, w_mixed);
4005   }
4006
4007   /* Verify that display width properly expands tabs.  */
4008   {
4009     const char *tstr = "\tabc\td";
4010     ASSERT_EQ (6, cpp_display_width (tstr, 6,
4011                                      cpp_char_column_policy (1, cpp_wcwidth)));
4012     ASSERT_EQ (10, cpp_display_width (tstr, 6,
4013                                       cpp_char_column_policy (3, cpp_wcwidth)));
4014     ASSERT_EQ (17, cpp_display_width (tstr, 6,
4015                                       cpp_char_column_policy (8, cpp_wcwidth)));
4016     ASSERT_EQ (1,
4017                cpp_display_column_to_byte_column
4018                  (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
4019   }
4020
4021   /* Verify that cpp_byte_column_to_display_column can go past the end,
4022      and similar edge cases.  */
4023   {
4024     const char *str
4025       /* Display columns.
4026          111111112345  */
4027       = "\xcf\x80 abc";
4028       /* 111122223456
4029          Byte columns.  */
4030
4031     ASSERT_EQ (5, cpp_display_width (str, 6, policy));
4032     ASSERT_EQ (105,
4033                cpp_byte_column_to_display_column (str, 6, 106, policy));
4034     ASSERT_EQ (10000,
4035                cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
4036     ASSERT_EQ (0,
4037                cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
4038   }
4039
4040   /* Verify that cpp_display_column_to_byte_column can go past the end,
4041      and similar edge cases, and check invertibility.  */
4042   {
4043     const char *str
4044       /* Display columns.
4045          000000000000000000000000000000000000011
4046          111111112222222234444444455555555678901  */
4047       = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
4048       /* 000000000000000000000000000000000111111
4049          111122223333444456666777788889999012345
4050          Byte columns.  */
4051     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
4052     ASSERT_EQ (15,
4053                cpp_display_column_to_byte_column (str, 15, 11, policy));
4054     ASSERT_EQ (115,
4055                cpp_display_column_to_byte_column (str, 15, 111, policy));
4056     ASSERT_EQ (10000,
4057                cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
4058     ASSERT_EQ (0,
4059                cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
4060
4061     /* Verify that we do not interrupt a UTF-8 sequence.  */
4062     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
4063
4064     for (int byte_col = 1; byte_col <= 15; ++byte_col)
4065       {
4066         const int disp_col
4067           = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
4068         const int byte_col2
4069           = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
4070
4071         /* If we ask for the display column in the middle of a UTF-8
4072            sequence, it will return the length of the partial sequence,
4073            matching the behavior of GCC before display column support.
4074            Otherwise check the round trip was successful.  */
4075         if (byte_col < 4)
4076           ASSERT_EQ (byte_col, disp_col);
4077         else if (byte_col >= 6 && byte_col < 9)
4078           ASSERT_EQ (3 + (byte_col - 5), disp_col);
4079         else
4080           ASSERT_EQ (byte_col2, byte_col);
4081       }
4082   }
4083 }
4084
4085 static bool
4086 check_cpp_valid_utf8_p (const char *str)
4087 {
4088   return cpp_valid_utf8_p (str, strlen (str));
4089 }
4090
4091 /* Check that cpp_valid_utf8_p works as expected.  */
4092
4093 static void
4094 test_cpp_valid_utf8_p ()
4095 {
4096   ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world"));
4097
4098   /* 2-byte char (pi).  */
4099   ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80"));
4100
4101   /* 3-byte chars (the Japanese word "mojibake").  */
4102   ASSERT_TRUE (check_cpp_valid_utf8_p
4103                (
4104                 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
4105                    UTF-8: 0xE6 0x96 0x87
4106                    C octal escaped UTF-8: \346\226\207.  */
4107                 "\346\226\207"
4108                 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
4109                    UTF-8: 0xE5 0xAD 0x97
4110                    C octal escaped UTF-8: \345\255\227.  */
4111                 "\345\255\227"
4112                 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
4113                    UTF-8: 0xE5 0x8C 0x96
4114                    C octal escaped UTF-8: \345\214\226.  */
4115                 "\345\214\226"
4116                 /* U+3051 HIRAGANA LETTER KE
4117                    UTF-8: 0xE3 0x81 0x91
4118                    C octal escaped UTF-8: \343\201\221.  */
4119                 "\343\201\221"));
4120
4121   /* 4-byte char: an emoji.  */
4122   ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82"));
4123
4124   /* Control codes, including the NUL byte.  */
4125   ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", 5));
4126
4127   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!"));
4128
4129   /* Unexpected continuation bytes.  */
4130   for (unsigned char continuation_byte = 0x80;
4131        continuation_byte <= 0xbf;
4132        continuation_byte++)
4133     ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, 1));
4134
4135   /* "Lonely start characters" for 2-byte sequences.  */
4136   {
4137     unsigned char buf[2];
4138     buf[1] = ' ';
4139     for (buf[0] = 0xc0;
4140          buf[0] <= 0xdf;
4141          buf[0]++)
4142       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4143   }
4144
4145   /* "Lonely start characters" for 3-byte sequences.  */
4146   {
4147     unsigned char buf[2];
4148     buf[1] = ' ';
4149     for (buf[0] = 0xe0;
4150          buf[0] <= 0xef;
4151          buf[0]++)
4152       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4153   }
4154
4155   /* "Lonely start characters" for 4-byte sequences.  */
4156   {
4157     unsigned char buf[2];
4158     buf[1] = ' ';
4159     for (buf[0] = 0xf0;
4160          buf[0] <= 0xf4;
4161          buf[0]++)
4162       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4163   }
4164
4165   /* Invalid start characters (formerly valid for 5-byte and 6-byte
4166      sequences).  */
4167   {
4168     unsigned char buf[2];
4169     buf[1] = ' ';
4170     for (buf[0] = 0xf5;
4171          buf[0] <= 0xfd;
4172          buf[0]++)
4173       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4174   }
4175
4176   /* Impossible bytes.  */
4177   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0"));
4178   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1"));
4179   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe"));
4180   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff"));
4181 }
4182
4183 /* Run all of the selftests within this file.  */
4184
4185 void
4186 input_cc_tests ()
4187 {
4188   test_linenum_comparisons ();
4189   test_should_have_column_data_p ();
4190   test_unknown_location ();
4191   test_builtins ();
4192   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
4193
4194   for_each_line_table_case (test_accessing_ordinary_linemaps);
4195   for_each_line_table_case (test_lexer);
4196   for_each_line_table_case (test_lexer_string_locations_simple);
4197   for_each_line_table_case (test_lexer_string_locations_ebcdic);
4198   for_each_line_table_case (test_lexer_string_locations_hex);
4199   for_each_line_table_case (test_lexer_string_locations_oct);
4200   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
4201   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
4202   for_each_line_table_case (test_lexer_string_locations_ucn4);
4203   for_each_line_table_case (test_lexer_string_locations_ucn8);
4204   for_each_line_table_case (test_lexer_string_locations_wide_string);
4205   for_each_line_table_case (test_lexer_string_locations_string16);
4206   for_each_line_table_case (test_lexer_string_locations_string32);
4207   for_each_line_table_case (test_lexer_string_locations_u8);
4208   for_each_line_table_case (test_lexer_string_locations_utf8_source);
4209   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
4210   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
4211   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
4212   for_each_line_table_case (test_lexer_string_locations_macro);
4213   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
4214   for_each_line_table_case (test_lexer_string_locations_non_string);
4215   for_each_line_table_case (test_lexer_string_locations_long_line);
4216   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
4217   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
4218   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
4219   for_each_line_table_case (test_lexer_char_constants);
4220
4221   test_reading_source_line ();
4222
4223   test_line_offset_overflow ();
4224
4225   test_cpp_utf8 ();
4226   test_cpp_valid_utf8_p ();
4227 }
4228
4229 } // namespace selftest
4230
4231 #endif /* CHECKING_P */