gcc/input.cc

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2024 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic.h"
  25 #include "selftest.h"
  26 #include "cpplib.h"
  27
  28 #ifndef HAVE_ICONV
  29 #define HAVE_ICONV 0
  30 #endif
  31
  32 const char *
  33 special_fname_builtin ()
  34 {
  35   return _("<built-in>");
  36 }
  37
  38 /* Input charset configuration.  */
  39 static const char *default_charset_callback (const char *)
  40 {
  41   return nullptr;
  42 }
  43
  44 void
  45 file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
  46                                       bool should_skip_bom)
  47 {
  48   in_context.ccb = (ccb ? ccb : default_charset_callback);
  49   in_context.should_skip_bom = should_skip_bom;
  50 }
  51
  52 /* This is a cache used by get_next_line to store the content of a
  53    file to be searched for file lines.  */
  54 class file_cache_slot
  55 {
  56 public:
  57   file_cache_slot ();
  58   ~file_cache_slot ();
  59
  60   bool read_line_num (size_t line_num,
  61                       char ** line, ssize_t *line_len);
  62
  63   /* Accessors.  */
  64   const char *get_file_path () const { return m_file_path; }
  65   unsigned get_use_count () const { return m_use_count; }
  66   bool missing_trailing_newline_p () const
  67   {
  68     return m_missing_trailing_newline;
  69   }
  70   char_span get_full_file_content ();
  71
  72   void inc_use_count () { m_use_count++; }
  73
  74   bool create (const file_cache::input_context &in_context,
  75                const char *file_path, FILE *fp, unsigned highest_use_count);
  76   void evict ();
  77
  78  private:
  79   /* These are information used to store a line boundary.  */
  80   class line_info
  81   {
  82   public:
  83     /* The line number.  It starts from 1.  */
  84     size_t line_num;
  85
  86     /* The position (byte count) of the beginning of the line,
  87        relative to the file data pointer.  This starts at zero.  */
  88     size_t start_pos;
  89
  90     /* The position (byte count) of the last byte of the line.  This
  91        normally points to the '\n' character, or to one byte after the
  92        last byte of the file, if the file doesn't contain a '\n'
  93        character.  */
  94     size_t end_pos;
  95
  96     line_info (size_t l, size_t s, size_t e)
  97       : line_num (l), start_pos (s), end_pos (e)
  98     {}
  99
 100     line_info ()
 101       :line_num (0), start_pos (0), end_pos (0)
 102     {}
 103   };
 104
 105   bool needs_read_p () const;
 106   bool needs_grow_p () const;
 107   void maybe_grow ();
 108   bool read_data ();
 109   bool maybe_read_data ();
 110   bool get_next_line (char **line, ssize_t *line_len);
 111   bool read_next_line (char ** line, ssize_t *line_len);
 112   bool goto_next_line ();
 113
 114   static const size_t buffer_size = 4 * 1024;
 115   static const size_t line_record_size = 100;
 116
 117   /* The number of time this file has been accessed.  This is used
 118      to designate which file cache to evict from the cache
 119      array.  */
 120   unsigned m_use_count;
 121
 122   /* The file_path is the key for identifying a particular file in
 123      the cache.
 124      For libcpp-using code, the underlying buffer for this field is
 125      owned by the corresponding _cpp_file within the cpp_reader.  */
 126   const char *m_file_path;
 127
 128   FILE *m_fp;
 129
 130   /* This points to the content of the file that we've read so
 131      far.  */
 132   char *m_data;
 133
 134   /* The allocated buffer to be freed may start a little earlier than DATA,
 135      e.g. if a UTF8 BOM was skipped at the beginning.  */
 136   int m_alloc_offset;
 137
 138   /*  The size of the DATA array above.*/
 139   size_t m_size;
 140
 141   /* The number of bytes read from the underlying file so far.  This
 142      must be less (or equal) than SIZE above.  */
 143   size_t m_nb_read;
 144
 145   /* The index of the beginning of the current line.  */
 146   size_t m_line_start_idx;
 147
 148   /* The number of the previous line read.  This starts at 1.  Zero
 149      means we've read no line so far.  */
 150   size_t m_line_num;
 151
 152   /* This is the total number of lines of the current file.  At the
 153      moment, we try to get this information from the line map
 154      subsystem.  Note that this is just a hint.  When using the C++
 155      front-end, this hint is correct because the input file is then
 156      completely tokenized before parsing starts; so the line map knows
 157      the number of lines before compilation really starts.  For e.g,
 158      the C front-end, it can happen that we start emitting diagnostics
 159      before the line map has seen the end of the file.  */
 160   size_t m_total_lines;
 161
 162   /* Could this file be missing a trailing newline on its final line?
 163      Initially true (to cope with empty files), set to true/false
 164      as each line is read.  */
 165   bool m_missing_trailing_newline;
 166
 167   /* This is a record of the beginning and end of the lines we've seen
 168      while reading the file.  This is useful to avoid walking the data
 169      from the beginning when we are asked to read a line that is
 170      before LINE_START_IDX above.  Note that the maximum size of this
 171      record is line_record_size, so that the memory consumption
 172      doesn't explode.  We thus scale total_lines down to
 173      line_record_size.  */
 174   vec<line_info, va_heap> m_line_record;
 175
 176   void offset_buffer (int offset)
 177   {
 178     gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
 179                 : (size_t) offset <= m_size);
 180     gcc_assert (m_data);
 181     m_alloc_offset += offset;
 182     m_data += offset;
 183     m_size -= offset;
 184   }
 185
 186 };
 187
 188 /* Current position in real source file.  */
 189
 190 location_t input_location = UNKNOWN_LOCATION;
 191
 192 class line_maps *line_table;
 193
 194 /* A stashed copy of "line_table" for use by selftest::line_table_test.
 195    This needs to be a global so that it can be a GC root, and thus
 196    prevent the stashed copy from being garbage-collected if the GC runs
 197    during a line_table_test.  */
 198
 199 class line_maps *saved_line_table;
 200
 201 /* Expand the source location LOC into a human readable location.  If
 202    LOC resolves to a builtin location, the file name of the readable
 203    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 204    TRUE and LOC is virtual, then it is resolved to the expansion
 205    point of the involved macro.  Otherwise, it is resolved to the
 206    spelling location of the token.
 207
 208    When resolving to the spelling location of the token, if the
 209    resulting location is for a built-in location (that is, it has no
 210    associated line/column) in the context of a macro expansion, the
 211    returned location is the first one (while unwinding the macro
 212    location towards its expansion point) that is in real source
 213    code.
 214
 215    ASPECT controls which part of the location to use.  */
 216
 217 static expanded_location
 218 expand_location_1 (const line_maps *set,
 219                    location_t loc,
 220                    bool expansion_point_p,
 221                    enum location_aspect aspect)
 222 {
 223   expanded_location xloc;
 224   const line_map_ordinary *map;
 225   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 226   tree block = NULL;
 227
 228   if (IS_ADHOC_LOC (loc))
 229     {
 230       block = LOCATION_BLOCK (loc);
 231       loc = LOCATION_LOCUS (loc);
 232     }
 233
 234   memset (&xloc, 0, sizeof (xloc));
 235
 236   if (loc >= RESERVED_LOCATION_COUNT)
 237     {
 238       if (!expansion_point_p)
 239         {
 240           /* We want to resolve LOC to its spelling location.
 241
 242              But if that spelling location is a reserved location that
 243              appears in the context of a macro expansion (like for a
 244              location for a built-in token), let's consider the first
 245              location (toward the expansion point) that is not reserved;
 246              that is, the first location that is in real source code.  */
 247           loc = linemap_unwind_to_first_non_reserved_loc (set,
 248                                                           loc, NULL);
 249           lrk = LRK_SPELLING_LOCATION;
 250         }
 251       loc = linemap_resolve_location (set, loc, lrk, &map);
 252
 253       /* loc is now either in an ordinary map, or is a reserved location.
 254          If it is a compound location, the caret is in a spelling location,
 255          but the start/finish might still be a virtual location.
 256          Depending of what the caller asked for, we may need to recurse
 257          one level in order to resolve any virtual locations in the
 258          end-points.  */
 259       switch (aspect)
 260         {
 261         default:
 262           gcc_unreachable ();
 263           /* Fall through.  */
 264         case LOCATION_ASPECT_CARET:
 265           break;
 266         case LOCATION_ASPECT_START:
 267           {
 268             location_t start = get_start (loc);
 269             if (start != loc)
 270               return expand_location_1 (set, start, expansion_point_p, aspect);
 271           }
 272           break;
 273         case LOCATION_ASPECT_FINISH:
 274           {
 275             location_t finish = get_finish (loc);
 276             if (finish != loc)
 277               return expand_location_1 (set, finish, expansion_point_p, aspect);
 278           }
 279           break;
 280         }
 281       xloc = linemap_expand_location (set, map, loc);
 282     }
 283
 284   xloc.data = block;
 285   if (loc <= BUILTINS_LOCATION)
 286     xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin ();
 287
 288   return xloc;
 289 }
 290
 291 /* Return the total lines number that have been read so far by the
 292    line map (in the preprocessor) so far.  For languages like C++ that
 293    entirely preprocess the input file before starting to parse, this
 294    equals the actual number of lines of the file.  */
 295
 296 static size_t
 297 total_lines_num (const char *file_path)
 298 {
 299   size_t r = 0;
 300   location_t l = 0;
 301   if (linemap_get_file_highest_location (line_table, file_path, &l))
 302     {
 303       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 304       expanded_location xloc = expand_location (l);
 305       r = xloc.line;
 306     }
 307   return r;
 308 }
 309
 310 /* Lookup the cache used for the content of a given file accessed by
 311    caret diagnostic.  Return the found cached file, or NULL if no
 312    cached file was found.  */
 313
 314 file_cache_slot *
 315 file_cache::lookup_file (const char *file_path)
 316 {
 317   gcc_assert (file_path);
 318
 319   /* This will contain the found cached file.  */
 320   file_cache_slot *r = NULL;
 321   for (unsigned i = 0; i < num_file_slots; ++i)
 322     {
 323       file_cache_slot *c = &m_file_slots[i];
 324       if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
 325         {
 326           c->inc_use_count ();
 327           r = c;
 328         }
 329     }
 330
 331   if (r)
 332     r->inc_use_count ();
 333
 334   return r;
 335 }
 336
 337 /* Purge any mention of FILENAME from the cache of files used for
 338    printing source code.  For use in selftests when working
 339    with tempfiles.  */
 340
 341 void
 342 file_cache::forcibly_evict_file (const char *file_path)
 343 {
 344   gcc_assert (file_path);
 345
 346   file_cache_slot *r = lookup_file (file_path);
 347   if (!r)
 348     /* Not found.  */
 349     return;
 350
 351   r->evict ();
 352 }
 353
 354 /* Determine if FILE_PATH missing a trailing newline on its final line.
 355    Only valid to call once all of the file has been loaded, by
 356    requesting a line number beyond the end of the file.  */
 357
 358 bool
 359 file_cache::missing_trailing_newline_p (const char *file_path)
 360 {
 361   gcc_assert (file_path);
 362
 363   file_cache_slot *r = lookup_or_add_file (file_path);
 364   return r->missing_trailing_newline_p ();
 365 }
 366
 367 void
 368 file_cache_slot::evict ()
 369 {
 370   m_file_path = NULL;
 371   if (m_fp)
 372     fclose (m_fp);
 373   m_fp = NULL;
 374   m_nb_read = 0;
 375   m_line_start_idx = 0;
 376   m_line_num = 0;
 377   m_line_record.truncate (0);
 378   m_use_count = 0;
 379   m_total_lines = 0;
 380   m_missing_trailing_newline = true;
 381 }
 382
 383 /* Return the file cache that has been less used, recently, or the
 384    first empty one.  If HIGHEST_USE_COUNT is non-null,
 385    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 386    in the cache table.  */
 387
 388 file_cache_slot*
 389 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
 390 {
 391   file_cache_slot *to_evict = &m_file_slots[0];
 392   unsigned huc = to_evict->get_use_count ();
 393   for (unsigned i = 1; i < num_file_slots; ++i)
 394     {
 395       file_cache_slot *c = &m_file_slots[i];
 396       bool c_is_empty = (c->get_file_path () == NULL);
 397
 398       if (c->get_use_count () < to_evict->get_use_count ()
 399           || (to_evict->get_file_path () && c_is_empty))
 400         /* We evict C because it's either an entry with a lower use
 401            count or one that is empty.  */
 402         to_evict = c;
 403
 404       if (huc < c->get_use_count ())
 405         huc = c->get_use_count ();
 406
 407       if (c_is_empty)
 408         /* We've reached the end of the cache; subsequent elements are
 409            all empty.  */
 410         break;
 411     }
 412
 413   if (highest_use_count)
 414     *highest_use_count = huc;
 415
 416   return to_evict;
 417 }
 418
 419 /* Create the cache used for the content of a given file to be
 420    accessed by caret diagnostic.  This cache is added to an array of
 421    cache and can be retrieved by lookup_file_in_cache_tab.  This
 422    function returns the created cache.  Note that only the last
 423    num_file_slots files are cached.
 424
 425    This can return nullptr if the FILE_PATH can't be opened for
 426    reading, or if the content can't be converted to the input_charset.  */
 427
 428 file_cache_slot*
 429 file_cache::add_file (const char *file_path)
 430 {
 431
 432   FILE *fp = fopen (file_path, "r");
 433   if (fp == NULL)
 434     return NULL;
 435
 436   unsigned highest_use_count = 0;
 437   file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
 438   if (!r->create (in_context, file_path, fp, highest_use_count))
 439     return NULL;
 440   return r;
 441 }
 442
 443 /* Get a borrowed char_span to the full content of this file
 444    as decoded according to the input charset, encoded as UTF-8.  */
 445
 446 char_span
 447 file_cache_slot::get_full_file_content ()
 448 {
 449   char *line;
 450   ssize_t line_len;
 451   while (get_next_line (&line, &line_len))
 452     {
 453     }
 454   return char_span (m_data, m_nb_read);
 455 }
 456
 457 /* Populate this slot for use on FILE_PATH and FP, dropping any
 458    existing cached content within it.  */
 459
 460 bool
 461 file_cache_slot::create (const file_cache::input_context &in_context,
 462                          const char *file_path, FILE *fp,
 463                          unsigned highest_use_count)
 464 {
 465   m_file_path = file_path;
 466   if (m_fp)
 467     fclose (m_fp);
 468   m_fp = fp;
 469   if (m_alloc_offset)
 470     offset_buffer (-m_alloc_offset);
 471   m_nb_read = 0;
 472   m_line_start_idx = 0;
 473   m_line_num = 0;
 474   m_line_record.truncate (0);
 475   /* Ensure that this cache entry doesn't get evicted next time
 476      add_file_to_cache_tab is called.  */
 477   m_use_count = ++highest_use_count;
 478   m_total_lines = total_lines_num (file_path);
 479   m_missing_trailing_newline = true;
 480
 481
 482   /* Check the input configuration to determine if we need to do any
 483      transformations, such as charset conversion or BOM skipping.  */
 484   if (const char *input_charset = in_context.ccb (file_path))
 485     {
 486       /* Need a full-blown conversion of the input charset.  */
 487       fclose (m_fp);
 488       m_fp = NULL;
 489       const cpp_converted_source cs
 490         = cpp_get_converted_source (file_path, input_charset);
 491       if (!cs.data)
 492         return false;
 493       if (m_data)
 494         XDELETEVEC (m_data);
 495       m_data = cs.data;
 496       m_nb_read = m_size = cs.len;
 497       m_alloc_offset = cs.data - cs.to_free;
 498     }
 499   else if (in_context.should_skip_bom)
 500     {
 501       if (read_data ())
 502         {
 503           const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
 504           offset_buffer (offset);
 505           m_nb_read -= offset;
 506         }
 507     }
 508
 509   return true;
 510 }
 511
 512 /* file_cache's ctor.  */
 513
 514 file_cache::file_cache ()
 515 : m_file_slots (new file_cache_slot[num_file_slots])
 516 {
 517   initialize_input_context (nullptr, false);
 518 }
 519
 520 /* file_cache's dtor.  */
 521
 522 file_cache::~file_cache ()
 523 {
 524   delete[] m_file_slots;
 525 }
 526
 527 /* Lookup the cache used for the content of a given file accessed by
 528    caret diagnostic.  If no cached file was found, create a new cache
 529    for this file, add it to the array of cached file and return
 530    it.
 531
 532    This can return nullptr on a cache miss if FILE_PATH can't be opened for
 533    reading, or if the content can't be converted to the input_charset.  */
 534
 535 file_cache_slot*
 536 file_cache::lookup_or_add_file (const char *file_path)
 537 {
 538   file_cache_slot *r = lookup_file (file_path);
 539   if (r == NULL)
 540     r = add_file (file_path);
 541   return r;
 542 }
 543
 544 /* Default constructor for a cache of file used by caret
 545    diagnostic.  */
 546
 547 file_cache_slot::file_cache_slot ()
 548 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
 549   m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
 550   m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
 551 {
 552   m_line_record.create (0);
 553 }
 554
 555 /* Destructor for a cache of file used by caret diagnostic.  */
 556
 557 file_cache_slot::~file_cache_slot ()
 558 {
 559   if (m_fp)
 560     {
 561       fclose (m_fp);
 562       m_fp = NULL;
 563     }
 564   if (m_data)
 565     {
 566       offset_buffer (-m_alloc_offset);
 567       XDELETEVEC (m_data);
 568       m_data = 0;
 569     }
 570   m_line_record.release ();
 571 }
 572
 573 /* Returns TRUE iff the cache would need to be filled with data coming
 574    from the file.  That is, either the cache is empty or full or the
 575    current line is empty.  Note that if the cache is full, it would
 576    need to be extended and filled again.  */
 577
 578 bool
 579 file_cache_slot::needs_read_p () const
 580 {
 581   return m_fp && (m_nb_read == 0
 582           || m_nb_read == m_size
 583           || (m_line_start_idx >= m_nb_read - 1));
 584 }
 585
 586 /*  Return TRUE iff the cache is full and thus needs to be
 587     extended.  */
 588
 589 bool
 590 file_cache_slot::needs_grow_p () const
 591 {
 592   return m_nb_read == m_size;
 593 }
 594
 595 /* Grow the cache if it needs to be extended.  */
 596
 597 void
 598 file_cache_slot::maybe_grow ()
 599 {
 600   if (!needs_grow_p ())
 601     return;
 602
 603   if (!m_data)
 604     {
 605       gcc_assert (m_size == 0 && m_alloc_offset == 0);
 606       m_size = buffer_size;
 607       m_data = XNEWVEC (char, m_size);
 608     }
 609   else
 610     {
 611       const int offset = m_alloc_offset;
 612       offset_buffer (-offset);
 613       m_size *= 2;
 614       m_data = XRESIZEVEC (char, m_data, m_size);
 615       offset_buffer (offset);
 616     }
 617 }
 618
 619 /*  Read more data into the cache.  Extends the cache if need be.
 620     Returns TRUE iff new data could be read.  */
 621
 622 bool
 623 file_cache_slot::read_data ()
 624 {
 625   if (feof (m_fp) || ferror (m_fp))
 626     return false;
 627
 628   maybe_grow ();
 629
 630   char * from = m_data + m_nb_read;
 631   size_t to_read = m_size - m_nb_read;
 632   size_t nb_read = fread (from, 1, to_read, m_fp);
 633
 634   if (ferror (m_fp))
 635     return false;
 636
 637   m_nb_read += nb_read;
 638   return !!nb_read;
 639 }
 640
 641 /* Read new data iff the cache needs to be filled with more data
 642    coming from the file FP.  Return TRUE iff the cache was filled with
 643    mode data.  */
 644
 645 bool
 646 file_cache_slot::maybe_read_data ()
 647 {
 648   if (!needs_read_p ())
 649     return false;
 650   return read_data ();
 651 }
 652
 653 /* Helper function for file_cache_slot::get_next_line (), to find the end of
 654    the next line.  Returns with the memchr convention, i.e. nullptr if a line
 655    terminator was not found.  We need to determine line endings in the same
 656    manner that libcpp does: any of \n, \r\n, or \r is a line ending.  */
 657
 658 static char *
 659 find_end_of_line (char *s, size_t len)
 660 {
 661   for (const auto end = s + len; s != end; ++s)
 662     {
 663       if (*s == '\n')
 664         return s;
 665       if (*s == '\r')
 666         {
 667           const auto next = s + 1;
 668           if (next == end)
 669             {
 670               /* Don't find the line ending if \r is the very last character
 671                  in the buffer; we do not know if it's the end of the file or
 672                  just the end of what has been read so far, and we wouldn't
 673                  want to break in the middle of what's actually a \r\n
 674                  sequence.  Instead, we will handle the case of a file ending
 675                  in a \r later.  */
 676               break;
 677             }
 678           return (*next == '\n' ? next : s);
 679         }
 680     }
 681   return nullptr;
 682 }
 683
 684 /* Read a new line from file FP, using C as a cache for the data
 685    coming from the file.  Upon successful completion, *LINE is set to
 686    the beginning of the line found.  *LINE points directly in the
 687    line cache and is only valid until the next call of get_next_line.
 688    *LINE_LEN is set to the length of the line.  Note that the line
 689    does not contain any terminal delimiter.  This function returns
 690    true if some data was read or process from the cache, false
 691    otherwise.  Note that subsequent calls to get_next_line might
 692    make the content of *LINE invalid.  */
 693
 694 bool
 695 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
 696 {
 697   /* Fill the cache with data to process.  */
 698   maybe_read_data ();
 699
 700   size_t remaining_size = m_nb_read - m_line_start_idx;
 701   if (remaining_size == 0)
 702     /* There is no more data to process.  */
 703     return false;
 704
 705   char *line_start = m_data + m_line_start_idx;
 706
 707   char *next_line_start = NULL;
 708   size_t len = 0;
 709   char *line_end = find_end_of_line (line_start, remaining_size);
 710   if (line_end == NULL)
 711     {
 712       /* We haven't found an end-of-line delimiter in the cache.
 713          Fill the cache with more data from the file and look again.  */
 714       while (maybe_read_data ())
 715         {
 716           line_start = m_data + m_line_start_idx;
 717           remaining_size = m_nb_read - m_line_start_idx;
 718           line_end = find_end_of_line (line_start, remaining_size);
 719           if (line_end != NULL)
 720             {
 721               next_line_start = line_end + 1;
 722               break;
 723             }
 724         }
 725       if (line_end == NULL)
 726         {
 727           /* We've loaded all the file into the cache and still no
 728              terminator.  Let's say the line ends up at one byte past the
 729              end of the file.  This is to stay consistent with the case
 730              of when the line ends up with a terminator and line_end points to
 731              that.  That consistency is useful below in the len calculation.
 732
 733              If the file ends in a \r, we didn't identify it as a line
 734              terminator above, so do that now instead.  */
 735           line_end = m_data + m_nb_read;
 736           if (m_nb_read && line_end[-1] == '\r')
 737             {
 738               --line_end;
 739               m_missing_trailing_newline = false;
 740             }
 741           else
 742             m_missing_trailing_newline = true;
 743         }
 744       else
 745         m_missing_trailing_newline = false;
 746     }
 747   else
 748     {
 749       next_line_start = line_end + 1;
 750       m_missing_trailing_newline = false;
 751     }
 752
 753   if (m_fp && ferror (m_fp))
 754     return false;
 755
 756   /* At this point, we've found the end of the of line.  It either points to
 757      the line terminator or to one byte after the last byte of the file.  */
 758   gcc_assert (line_end != NULL);
 759
 760   len = line_end - line_start;
 761
 762   if (m_line_start_idx < m_nb_read)
 763     *line = line_start;
 764
 765   ++m_line_num;
 766
 767   /* Before we update our line record, make sure the hint about the
 768      total number of lines of the file is correct.  If it's not, then
 769      we give up recording line boundaries from now on.  */
 770   bool update_line_record = true;
 771   if (m_line_num > m_total_lines)
 772     update_line_record = false;
 773
 774     /* Now update our line record so that re-reading lines from the
 775      before m_line_start_idx is faster.  */
 776   if (update_line_record
 777       && m_line_record.length () < line_record_size)
 778     {
 779       /* If the file lines fits in the line record, we just record all
 780          its lines ...*/
 781       if (m_total_lines <= line_record_size
 782           && m_line_num > m_line_record.length ())
 783         m_line_record.safe_push
 784           (file_cache_slot::line_info (m_line_num,
 785                                        m_line_start_idx,
 786                                        line_end - m_data));
 787       else if (m_total_lines > line_record_size)
 788         {
 789           /* ... otherwise, we just scale total_lines down to
 790              (line_record_size lines.  */
 791           size_t n = (m_line_num * line_record_size) / m_total_lines;
 792           if (m_line_record.length () == 0
 793               || n >= m_line_record.length ())
 794             m_line_record.safe_push
 795               (file_cache_slot::line_info (m_line_num,
 796                                            m_line_start_idx,
 797                                            line_end - m_data));
 798         }
 799     }
 800
 801   /* Update m_line_start_idx so that it points to the next line to be
 802      read.  */
 803   if (next_line_start)
 804     m_line_start_idx = next_line_start - m_data;
 805   else
 806     /* We didn't find any terminal '\n'.  Let's consider that the end
 807        of line is the end of the data in the cache.  The next
 808        invocation of get_next_line will either read more data from the
 809        underlying file or return false early because we've reached the
 810        end of the file.  */
 811     m_line_start_idx = m_nb_read;
 812
 813   *line_len = len;
 814
 815   return true;
 816 }
 817
 818 /* Consume the next bytes coming from the cache (or from its
 819    underlying file if there are remaining unread bytes in the file)
 820    until we reach the next end-of-line (or end-of-file).  There is no
 821    copying from the cache involved.  Return TRUE upon successful
 822    completion.  */
 823
 824 bool
 825 file_cache_slot::goto_next_line ()
 826 {
 827   char *l;
 828   ssize_t len;
 829
 830   return get_next_line (&l, &len);
 831 }
 832
 833 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 834    If the line was read successfully, *LINE points to the beginning
 835    of the line in the file cache and *LINE_LEN is the length of the
 836    line.  *LINE is not nul-terminated, but may contain zero bytes.
 837    *LINE is only valid until the next call of read_line_num.
 838    This function returns bool if a line was read.  */
 839
 840 bool
 841 file_cache_slot::read_line_num (size_t line_num,
 842                        char ** line, ssize_t *line_len)
 843 {
 844   gcc_assert (line_num > 0);
 845
 846   if (line_num <= m_line_num)
 847     {
 848       /* We've been asked to read lines that are before m_line_num.
 849          So lets use our line record (if it's not empty) to try to
 850          avoid re-reading the file from the beginning again.  */
 851
 852       if (m_line_record.is_empty ())
 853         {
 854           m_line_start_idx = 0;
 855           m_line_num = 0;
 856         }
 857       else
 858         {
 859           file_cache_slot::line_info *i = NULL;
 860           if (m_total_lines <= line_record_size)
 861             {
 862               /* In languages where the input file is not totally
 863                  preprocessed up front, the m_total_lines hint
 864                  can be smaller than the number of lines of the
 865                  file.  In that case, only the first
 866                  m_total_lines have been recorded.
 867
 868                  Otherwise, the first m_total_lines we've read have
 869                  their start/end recorded here.  */
 870               i = (line_num <= m_total_lines)
 871                 ? &m_line_record[line_num - 1]
 872                 : &m_line_record[m_total_lines - 1];
 873               gcc_assert (i->line_num <= line_num);
 874             }
 875           else
 876             {
 877               /*  So the file had more lines than our line record
 878                   size.  Thus the number of lines we've recorded has
 879                   been scaled down to line_record_size.  Let's
 880                   pick the start/end of the recorded line that is
 881                   closest to line_num.  */
 882               size_t n = (line_num <= m_total_lines)
 883                 ? line_num * line_record_size / m_total_lines
 884                 : m_line_record.length () - 1;
 885               if (n < m_line_record.length ())
 886                 {
 887                   i = &m_line_record[n];
 888                   gcc_assert (i->line_num <= line_num);
 889                 }
 890             }
 891
 892           if (i && i->line_num == line_num)
 893             {
 894               /* We have the start/end of the line.  */
 895               *line = m_data + i->start_pos;
 896               *line_len = i->end_pos - i->start_pos;
 897               return true;
 898             }
 899
 900           if (i)
 901             {
 902               m_line_start_idx = i->start_pos;
 903               m_line_num = i->line_num - 1;
 904             }
 905           else
 906             {
 907               m_line_start_idx = 0;
 908               m_line_num = 0;
 909             }
 910         }
 911     }
 912
 913   /*  Let's walk from line m_line_num up to line_num - 1, without
 914       copying any line.  */
 915   while (m_line_num < line_num - 1)
 916     if (!goto_next_line ())
 917       return false;
 918
 919   /* The line we want is the next one.  Let's read and copy it back to
 920      the caller.  */
 921   return get_next_line (line, line_len);
 922 }
 923
 924 /* Return the physical source line that corresponds to FILE_PATH/LINE.
 925    The line is not nul-terminated.  The returned pointer is only
 926    valid until the next call of location_get_source_line.
 927    Note that the line can contain several null characters,
 928    so the returned value's length has the actual length of the line.
 929    If the function fails, a NULL char_span is returned.  */
 930
 931 char_span
 932 file_cache::get_source_line (const char *file_path, int line)
 933 {
 934   char *buffer = NULL;
 935   ssize_t len;
 936
 937   if (line == 0)
 938     return char_span (NULL, 0);
 939
 940   if (file_path == NULL)
 941     return char_span (NULL, 0);
 942
 943   file_cache_slot *c = lookup_or_add_file (file_path);
 944   if (c == NULL)
 945     return char_span (NULL, 0);
 946
 947   bool read = c->read_line_num (line, &buffer, &len);
 948   if (!read)
 949     return char_span (NULL, 0);
 950
 951   return char_span (buffer, len);
 952 }
 953
 954 /* Return a NUL-terminated copy of the source text between two locations, or
 955    NULL if the arguments are invalid.  The caller is responsible for freeing
 956    the return value.  */
 957
 958 char *
 959 get_source_text_between (file_cache &fc, location_t start, location_t end)
 960 {
 961   expanded_location expstart =
 962     expand_location_to_spelling_point (start, LOCATION_ASPECT_START);
 963   expanded_location expend =
 964     expand_location_to_spelling_point (end, LOCATION_ASPECT_FINISH);
 965
 966   /* If the locations are in different files or the end comes before the
 967      start, give up and return nothing.  */
 968   if (!expstart.file || !expend.file)
 969     return NULL;
 970   if (strcmp (expstart.file, expend.file) != 0)
 971     return NULL;
 972   if (expstart.line > expend.line)
 973     return NULL;
 974   if (expstart.line == expend.line
 975       && expstart.column > expend.column)
 976     return NULL;
 977   /* These aren't real column numbers, give up.  */
 978   if (expstart.column == 0 || expend.column == 0)
 979     return NULL;
 980
 981   /* For a single line we need to trim both edges.  */
 982   if (expstart.line == expend.line)
 983     {
 984       char_span line = fc.get_source_line (expstart.file, expstart.line);
 985       if (line.length () < 1)
 986         return NULL;
 987       int s = expstart.column - 1;
 988       int len = expend.column - s;
 989       if (line.length () < (size_t)expend.column)
 990         return NULL;
 991       return line.subspan (s, len).xstrdup ();
 992     }
 993
 994   struct obstack buf_obstack;
 995   obstack_init (&buf_obstack);
 996
 997   /* Loop through all lines in the range and append each to buf; may trim
 998      parts of the start and end lines off depending on column values.  */
 999   for (int lnum = expstart.line; lnum <= expend.line; ++lnum)
1000     {
1001       char_span line = fc.get_source_line (expstart.file, lnum);
1002       if (line.length () < 1 && (lnum != expstart.line && lnum != expend.line))
1003         continue;
1004
1005       /* For the first line in the range, only start at expstart.column */
1006       if (lnum == expstart.line)
1007         {
1008           unsigned off = expstart.column - 1;
1009           if (line.length () < off)
1010             return NULL;
1011           line = line.subspan (off, line.length() - off);
1012         }
1013       /* For the last line, don't go past expend.column */
1014       else if (lnum == expend.line)
1015         {
1016           if (line.length () < (size_t)expend.column)
1017             return NULL;
1018           line = line.subspan (0, expend.column);
1019         }
1020
1021       /* Combine spaces at the beginning of later lines.  */
1022       if (lnum > expstart.line)
1023         {
1024           unsigned off;
1025           for (off = 0; off < line.length(); ++off)
1026             if (line[off] != ' ' && line[off] != '\t')
1027               break;
1028           if (off > 0)
1029             {
1030               obstack_1grow (&buf_obstack, ' ');
1031               line = line.subspan (off, line.length() - off);
1032             }
1033         }
1034
1035       /* This does not include any trailing newlines.  */
1036       obstack_grow (&buf_obstack, line.get_buffer (), line.length ());
1037     }
1038
1039   /* NUL-terminate and finish the buf obstack.  */
1040   obstack_1grow (&buf_obstack, 0);
1041   const char *buf = (const char *) obstack_finish (&buf_obstack);
1042
1043   return xstrdup (buf);
1044 }
1045
1046
1047 char_span
1048 file_cache::get_source_file_content (const char *file_path)
1049 {
1050   file_cache_slot *c = lookup_or_add_file (file_path);
1051   if (c == nullptr)
1052     return char_span (nullptr, 0);
1053   return c->get_full_file_content ();
1054 }
1055
1056 /* Test if the location originates from the spelling location of a
1057    builtin-tokens.  That is, return TRUE if LOC is a (possibly
1058    virtual) location of a built-in token that appears in the expansion
1059    list of a macro.  Please note that this function also works on
1060    tokens that result from built-in tokens.  For instance, the
1061    function would return true if passed a token "4" that is the result
1062    of the expansion of the built-in __LINE__ macro.  */
1063 bool
1064 is_location_from_builtin_token (location_t loc)
1065 {
1066   const line_map_ordinary *map = NULL;
1067   loc = linemap_resolve_location (line_table, loc,
1068                                   LRK_SPELLING_LOCATION, &map);
1069   return loc == BUILTINS_LOCATION;
1070 }
1071
1072 /* Expand the source location LOC into a human readable location.  If
1073    LOC is virtual, it resolves to the expansion point of the involved
1074    macro.  If LOC resolves to a builtin location, the file name of the
1075    readable location is set to the string "<built-in>".  */
1076
1077 expanded_location
1078 expand_location (location_t loc)
1079 {
1080   return expand_location_1 (line_table, loc, /*expansion_point_p=*/true,
1081                             LOCATION_ASPECT_CARET);
1082 }
1083
1084 /* Expand the source location LOC into a human readable location.  If
1085    LOC is virtual, it resolves to the expansion location of the
1086    relevant macro.  If LOC resolves to a builtin location, the file
1087    name of the readable location is set to the string
1088    "<built-in>".  */
1089
1090 expanded_location
1091 expand_location_to_spelling_point (location_t loc,
1092                                    enum location_aspect aspect)
1093 {
1094   return expand_location_1 (line_table, loc, /*expansion_point_p=*/false,
1095                             aspect);
1096 }
1097
1098 /* The rich_location class within libcpp requires a way to expand
1099    location_t instances, and relies on the client code
1100    providing a symbol named
1101      linemap_client_expand_location_to_spelling_point
1102    to do this.
1103
1104    This is the implementation for libcommon.a (all host binaries),
1105    which simply calls into expand_location_1.  */
1106
1107 expanded_location
1108 linemap_client_expand_location_to_spelling_point (const line_maps *set,
1109                                                   location_t loc,
1110                                                   enum location_aspect aspect)
1111 {
1112   return expand_location_1 (set, loc, /*expansion_point_p=*/false, aspect);
1113 }
1114
1115
1116 /* If LOCATION is in a system header and if it is a virtual location
1117    for a token coming from the expansion of a macro, unwind it to
1118    the location of the expansion point of the macro.  If the expansion
1119    point is also in a system header return the original LOCATION.
1120    Otherwise, return the location of the expansion point.
1121
1122    This is used for instance when we want to emit diagnostics about a
1123    token that may be located in a macro that is itself defined in a
1124    system header, for example, for the NULL macro.  In such a case, if
1125    LOCATION were passed directly to diagnostic functions such as
1126    warning_at, the diagnostic would be suppressed (unless
1127    -Wsystem-headers).  */
1128
1129 location_t
1130 expansion_point_location_if_in_system_header (location_t location)
1131 {
1132   if (!in_system_header_at (location))
1133     return location;
1134
1135   location_t xloc = linemap_resolve_location (line_table, location,
1136                                               LRK_MACRO_EXPANSION_POINT,
1137                                               NULL);
1138   return in_system_header_at (xloc) ? location : xloc;
1139 }
1140
1141 /* If LOCATION is a virtual location for a token coming from the expansion
1142    of a macro, unwind to the location of the expansion point of the macro.  */
1143
1144 location_t
1145 expansion_point_location (location_t location)
1146 {
1147   return linemap_resolve_location (line_table, location,
1148                                    LRK_MACRO_EXPANSION_POINT, NULL);
1149 }
1150
1151 /* Construct a location with caret at CARET, ranging from START to
1152    FINISH.
1153
1154    For example, consider:
1155
1156                  11111111112
1157         12345678901234567890
1158      522
1159      523   return foo + bar;
1160                   ~~~~^~~~~
1161      524
1162
1163    The location's caret is at the "+", line 523 column 15, but starts
1164    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
1165    of "bar" at column 19.  */
1166
1167 location_t
1168 make_location (location_t caret, location_t start, location_t finish)
1169 {
1170   return line_table->make_location (caret, start, finish);
1171 }
1172
1173 /* Same as above, but taking a source range rather than two locations.  */
1174
1175 location_t
1176 make_location (location_t caret, source_range src_range)
1177 {
1178   location_t pure_loc = get_pure_location (caret);
1179   return line_table->get_or_create_combined_loc (pure_loc, src_range,
1180                                                  nullptr, 0);
1181 }
1182
1183 /* An expanded_location stores the column in byte units.  This function
1184    converts that column to display units.  That requires reading the associated
1185    source line in order to calculate the display width.  If that cannot be done
1186    for any reason, then returns the byte column as a fallback.  */
1187 int
1188 location_compute_display_column (file_cache &fc,
1189                                  expanded_location exploc,
1190                                  const cpp_char_column_policy &policy)
1191 {
1192   if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1193     return exploc.column;
1194   char_span line = fc.get_source_line (exploc.file, exploc.line);
1195   /* If line is NULL, this function returns exploc.column which is the
1196      desired fallback.  */
1197   return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
1198                                             exploc.column, policy);
1199 }
1200
1201 /* Dump statistics to stderr about the memory usage of the line_table
1202    set of line maps.  This also displays some statistics about macro
1203    expansion.  */
1204
1205 void
1206 dump_line_table_statistics (void)
1207 {
1208   struct linemap_stats s;
1209   long total_used_map_size,
1210     macro_maps_size,
1211     total_allocated_map_size;
1212
1213   memset (&s, 0, sizeof (s));
1214
1215   linemap_get_statistics (line_table, &s);
1216
1217   macro_maps_size = s.macro_maps_used_size
1218     + s.macro_maps_locations_size;
1219
1220   total_allocated_map_size = s.ordinary_maps_allocated_size
1221     + s.macro_maps_allocated_size
1222     + s.macro_maps_locations_size;
1223
1224   total_used_map_size = s.ordinary_maps_used_size
1225     + s.macro_maps_used_size
1226     + s.macro_maps_locations_size;
1227
1228   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
1229            s.num_expanded_macros);
1230   if (s.num_expanded_macros != 0)
1231     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
1232              s.num_macro_tokens / s.num_expanded_macros);
1233   fprintf (stderr,
1234            "\nLine Table allocations during the "
1235            "compilation process\n");
1236   fprintf (stderr, "Number of ordinary maps used:        " PRsa (5) "\n",
1237            SIZE_AMOUNT (s.num_ordinary_maps_used));
1238   fprintf (stderr, "Ordinary map used size:              " PRsa (5) "\n",
1239            SIZE_AMOUNT (s.ordinary_maps_used_size));
1240   fprintf (stderr, "Number of ordinary maps allocated:   " PRsa (5) "\n",
1241            SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1242   fprintf (stderr, "Ordinary maps allocated size:        " PRsa (5) "\n",
1243            SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1244   fprintf (stderr, "Number of macro maps used:           " PRsa (5) "\n",
1245            SIZE_AMOUNT (s.num_macro_maps_used));
1246   fprintf (stderr, "Macro maps used size:                " PRsa (5) "\n",
1247            SIZE_AMOUNT (s.macro_maps_used_size));
1248   fprintf (stderr, "Macro maps locations size:           " PRsa (5) "\n",
1249            SIZE_AMOUNT (s.macro_maps_locations_size));
1250   fprintf (stderr, "Macro maps size:                     " PRsa (5) "\n",
1251            SIZE_AMOUNT (macro_maps_size));
1252   fprintf (stderr, "Duplicated maps locations size:      " PRsa (5) "\n",
1253            SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1254   fprintf (stderr, "Total allocated maps size:           " PRsa (5) "\n",
1255            SIZE_AMOUNT (total_allocated_map_size));
1256   fprintf (stderr, "Total used maps size:                " PRsa (5) "\n",
1257            SIZE_AMOUNT (total_used_map_size));
1258   fprintf (stderr, "Ad-hoc table size:                   " PRsa (5) "\n",
1259            SIZE_AMOUNT (s.adhoc_table_size));
1260   fprintf (stderr, "Ad-hoc table entries used:           " PRsa (5) "\n",
1261            SIZE_AMOUNT (s.adhoc_table_entries_used));
1262   fprintf (stderr, "optimized_ranges:                    " PRsa (5) "\n",
1263            SIZE_AMOUNT (line_table->m_num_optimized_ranges));
1264   fprintf (stderr, "unoptimized_ranges:                  " PRsa (5) "\n",
1265            SIZE_AMOUNT (line_table->m_num_unoptimized_ranges));
1266
1267   fprintf (stderr, "\n");
1268 }
1269
1270 /* Get location one beyond the final location in ordinary map IDX.  */
1271
1272 static location_t
1273 get_end_location (class line_maps *set, unsigned int idx)
1274 {
1275   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1276     return set->highest_location;
1277
1278   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1279   return MAP_START_LOCATION (next_map);
1280 }
1281
1282 /* Helper function for write_digit_row.  */
1283
1284 static void
1285 write_digit (FILE *stream, int digit)
1286 {
1287   fputc ('0' + (digit % 10), stream);
1288 }
1289
1290 /* Helper function for dump_location_info.
1291    Write a row of numbers to STREAM, numbering a source line,
1292    giving the units, tens, hundreds etc of the column number.  */
1293
1294 static void
1295 write_digit_row (FILE *stream, int indent,
1296                  const line_map_ordinary *map,
1297                  location_t loc, int max_col, int divisor)
1298 {
1299   fprintf (stream, "%*c", indent, ' ');
1300   fprintf (stream, "|");
1301   for (int column = 1; column < max_col; column++)
1302     {
1303       location_t column_loc = loc + (column << map->m_range_bits);
1304       write_digit (stream, column_loc / divisor);
1305     }
1306   fprintf (stream, "\n");
1307 }
1308
1309 /* Write a half-closed (START) / half-open (END) interval of
1310    location_t to STREAM.  */
1311
1312 static void
1313 dump_location_range (FILE *stream,
1314                      location_t start, location_t end)
1315 {
1316   fprintf (stream,
1317            "  location_t interval: %u <= loc < %u\n",
1318            start, end);
1319 }
1320
1321 /* Write a labelled description of a half-closed (START) / half-open (END)
1322    interval of location_t to STREAM.  */
1323
1324 static void
1325 dump_labelled_location_range (FILE *stream,
1326                               const char *name,
1327                               location_t start, location_t end)
1328 {
1329   fprintf (stream, "%s\n", name);
1330   dump_location_range (stream, start, end);
1331   fprintf (stream, "\n");
1332 }
1333
1334 /* Write a visualization of the locations in the line_table to STREAM.  */
1335
1336 void
1337 dump_location_info (FILE *stream)
1338 {
1339   file_cache fc;
1340
1341   /* Visualize the reserved locations.  */
1342   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1343                                 0, RESERVED_LOCATION_COUNT);
1344
1345   /* Visualize the ordinary line_map instances, rendering the sources. */
1346   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1347     {
1348       location_t end_location = get_end_location (line_table, idx);
1349       /* half-closed: doesn't include this one. */
1350
1351       const line_map_ordinary *map
1352         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1353       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1354       dump_location_range (stream,
1355                            MAP_START_LOCATION (map), end_location);
1356       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1357       fprintf (stream, "  starting at line: %i\n",
1358                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1359       fprintf (stream, "  column and range bits: %i\n",
1360                map->m_column_and_range_bits);
1361       fprintf (stream, "  column bits: %i\n",
1362                map->m_column_and_range_bits - map->m_range_bits);
1363       fprintf (stream, "  range bits: %i\n",
1364                map->m_range_bits);
1365       const char * reason;
1366       switch (map->reason) {
1367       case LC_ENTER:
1368         reason = "LC_ENTER";
1369         break;
1370       case LC_LEAVE:
1371         reason = "LC_LEAVE";
1372         break;
1373       case LC_RENAME:
1374         reason = "LC_RENAME";
1375         break;
1376       case LC_RENAME_VERBATIM:
1377         reason = "LC_RENAME_VERBATIM";
1378         break;
1379       case LC_ENTER_MACRO:
1380         reason = "LC_RENAME_MACRO";
1381         break;
1382       default:
1383         reason = "Unknown";
1384       }
1385       fprintf (stream, "  reason: %d (%s)\n", map->reason, reason);
1386
1387       const line_map_ordinary *includer_map
1388         = linemap_included_from_linemap (line_table, map);
1389       fprintf (stream, "  included from location: %d",
1390                linemap_included_from (map));
1391       if (includer_map) {
1392         fprintf (stream, " (in ordinary map %d)",
1393                  int (includer_map - line_table->info_ordinary.maps));
1394       }
1395       fprintf (stream, "\n");
1396
1397       /* Render the span of source lines that this "map" covers.  */
1398       for (location_t loc = MAP_START_LOCATION (map);
1399            loc < end_location;
1400            loc += (1 << map->m_range_bits) )
1401         {
1402           gcc_assert (pure_location_p (line_table, loc) );
1403
1404           expanded_location exploc
1405             = linemap_expand_location (line_table, map, loc);
1406
1407           if (exploc.column == 0)
1408             {
1409               /* Beginning of a new source line: draw the line.  */
1410
1411               char_span line_text = fc.get_source_line (exploc.file,
1412                                                         exploc.line);
1413               if (!line_text)
1414                 break;
1415               fprintf (stream,
1416                        "%s:%3i|loc:%5i|%.*s\n",
1417                        exploc.file, exploc.line,
1418                        loc,
1419                        (int)line_text.length (), line_text.get_buffer ());
1420
1421               /* "loc" is at column 0, which means "the whole line".
1422                  Render the locations *within* the line, by underlining
1423                  it, showing the location_t numeric values
1424                  at each column.  */
1425               size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1426               if (max_col > line_text.length ())
1427                 max_col = line_text.length () + 1;
1428
1429               int len_lnum = num_digits (exploc.line);
1430               if (len_lnum < 3)
1431                 len_lnum = 3;
1432               int len_loc = num_digits (loc);
1433               if (len_loc < 5)
1434                 len_loc = 5;
1435
1436               int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1437
1438               /* Thousands.  */
1439               if (end_location > 999)
1440                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1441
1442               /* Hundreds.  */
1443               if (end_location > 99)
1444                 write_digit_row (stream, indent, map, loc, max_col, 100);
1445
1446               /* Tens.  */
1447               write_digit_row (stream, indent, map, loc, max_col, 10);
1448
1449               /* Units.  */
1450               write_digit_row (stream, indent, map, loc, max_col, 1);
1451             }
1452         }
1453       fprintf (stream, "\n");
1454     }
1455
1456   /* Visualize unallocated values.  */
1457   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1458                                 line_table->highest_location,
1459                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1460
1461   /* Visualize the macro line_map instances, rendering the sources. */
1462   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1463     {
1464       /* Each macro map that is allocated owns location_t values
1465          that are *lower* that the one before them.
1466          Hence it's meaningful to view them either in order of ascending
1467          source locations, or in order of ascending macro map index.  */
1468       const bool ascending_location_ts = true;
1469       unsigned int idx = (ascending_location_ts
1470                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1471                           : i);
1472       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1473       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1474                idx,
1475                linemap_map_get_macro_name (map),
1476                MACRO_MAP_NUM_MACRO_TOKENS (map));
1477       dump_location_range (stream,
1478                            map->start_location,
1479                            (map->start_location
1480                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1481       inform (map->get_expansion_point_location (),
1482               "expansion point is location %i",
1483               map->get_expansion_point_location ());
1484       fprintf (stream, "  map->start_location: %u\n",
1485                map->start_location);
1486
1487       fprintf (stream, "  macro_locations:\n");
1488       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1489         {
1490           location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1491           location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1492
1493           /* linemap_add_macro_token encodes token numbers in an expansion
1494              by putting them after MAP_START_LOCATION. */
1495
1496           /* I'm typically seeing 4 uninitialized entries at the end of
1497              0xafafafaf.
1498              This appears to be due to macro.cc:replace_args
1499              adding 2 extra args for padding tokens; presumably there may
1500              be a leading and/or trailing padding token injected,
1501              each for 2 more location slots.
1502              This would explain there being up to 4 location_ts slots
1503              that may be uninitialized.  */
1504
1505           fprintf (stream, "    %u: %u, %u\n",
1506                    i,
1507                    x,
1508                    y);
1509           if (x == y)
1510             {
1511               if (x < MAP_START_LOCATION (map))
1512                 inform (x, "token %u has %<x-location == y-location == %u%>",
1513                         i, x);
1514               else
1515                 fprintf (stream,
1516                          "x-location == y-location == %u encodes token # %u\n",
1517                          x, x - MAP_START_LOCATION (map));
1518                 }
1519           else
1520             {
1521               inform (x, "token %u has %<x-location == %u%>", i, x);
1522               inform (x, "token %u has %<y-location == %u%>", i, y);
1523             }
1524         }
1525       fprintf (stream, "\n");
1526     }
1527
1528   /* It appears that MAX_LOCATION_T itself is never assigned to a
1529      macro map, presumably due to an off-by-one error somewhere
1530      between the logic in linemap_enter_macro and
1531      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1532   dump_labelled_location_range (stream, "MAX_LOCATION_T",
1533                                 MAX_LOCATION_T,
1534                                 MAX_LOCATION_T + 1);
1535
1536   /* Visualize ad-hoc values.  */
1537   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1538                                 MAX_LOCATION_T + 1, UINT_MAX);
1539 }
1540
1541 /* string_concat's constructor.  */
1542
1543 string_concat::string_concat (int num, location_t *locs)
1544   : m_num (num)
1545 {
1546   m_locs = ggc_vec_alloc <location_t> (num);
1547   for (int i = 0; i < num; i++)
1548     m_locs[i] = locs[i];
1549 }
1550
1551 /* string_concat_db's constructor.  */
1552
1553 string_concat_db::string_concat_db ()
1554 {
1555   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1556 }
1557
1558 /* Record that a string concatenation occurred, covering NUM
1559    string literal tokens.  LOCS is an array of size NUM, containing the
1560    locations of the tokens.  A copy of LOCS is taken.  */
1561
1562 void
1563 string_concat_db::record_string_concatenation (int num, location_t *locs)
1564 {
1565   gcc_assert (num > 1);
1566   gcc_assert (locs);
1567
1568   location_t key_loc = get_key_loc (locs[0]);
1569   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1570      any data now recorded under key 'key_loc' would be overwritten by a
1571      subsequent call with the same key 'key_loc'.  */
1572   if (RESERVED_LOCATION_P (key_loc))
1573     return;
1574
1575   string_concat *concat
1576     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1577   m_table->put (key_loc, concat);
1578 }
1579
1580 /* Determine if LOC was the location of the initial token of a
1581    concatenation of string literal tokens.
1582    If so, *OUT_NUM is written to with the number of tokens, and
1583    *OUT_LOCS with the location of an array of locations of the
1584    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1585    storage owned by the string_concat_db.
1586    Otherwise, return false.  */
1587
1588 bool
1589 string_concat_db::get_string_concatenation (location_t loc,
1590                                             int *out_num,
1591                                             location_t **out_locs)
1592 {
1593   gcc_assert (out_num);
1594   gcc_assert (out_locs);
1595
1596   location_t key_loc = get_key_loc (loc);
1597   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1598      discussion in 'string_concat_db::record_string_concatenation'.  */
1599   if (RESERVED_LOCATION_P (key_loc))
1600     return false;
1601
1602   string_concat **concat = m_table->get (key_loc);
1603   if (!concat)
1604     return false;
1605
1606   *out_num = (*concat)->m_num;
1607   *out_locs =(*concat)->m_locs;
1608   return true;
1609 }
1610
1611 /* Internal function.  Canonicalize LOC into a form suitable for
1612    use as a key within the database, stripping away macro expansion,
1613    ad-hoc information, and range information, using the location of
1614    the start of LOC within an ordinary linemap.  */
1615
1616 location_t
1617 string_concat_db::get_key_loc (location_t loc)
1618 {
1619   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1620                                   NULL);
1621
1622   loc = get_range_from_loc (line_table, loc).m_start;
1623
1624   return loc;
1625 }
1626
1627 /* Helper class for use within get_substring_ranges_for_loc.
1628    An vec of cpp_string with responsibility for releasing all of the
1629    str->text for each str in the vector.  */
1630
1631 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1632 {
1633  public:
1634   auto_cpp_string_vec (int alloc)
1635     : auto_vec <cpp_string> (alloc) {}
1636
1637   ~auto_cpp_string_vec ()
1638   {
1639     /* Clean up the copies within this vec.  */
1640     int i;
1641     cpp_string *str;
1642     FOR_EACH_VEC_ELT (*this, i, str)
1643       free (const_cast <unsigned char *> (str->text));
1644   }
1645 };
1646
1647 /* Attempt to populate RANGES with source location information on the
1648    individual characters within the string literal found at STRLOC.
1649    If CONCATS is non-NULL, then any string literals that the token at
1650    STRLOC  was concatenated with are also added to RANGES.
1651
1652    Return NULL if successful, or an error message if any errors occurred (in
1653    which case RANGES may be only partially populated and should not
1654    be used).
1655
1656    This is implemented by re-parsing the relevant source line(s).  */
1657
1658 static const char *
1659 get_substring_ranges_for_loc (cpp_reader *pfile,
1660                               file_cache &fc,
1661                               string_concat_db *concats,
1662                               location_t strloc,
1663                               enum cpp_ttype type,
1664                               cpp_substring_ranges &ranges)
1665 {
1666   gcc_assert (pfile);
1667
1668   if (strloc == UNKNOWN_LOCATION)
1669     return "unknown location";
1670
1671   /* Reparsing the strings requires accurate location information.
1672      If -ftrack-macro-expansion has been overridden from its default
1673      of 2, then we might have a location of a macro expansion point,
1674      rather than the location of the literal itself.
1675      Avoid this by requiring that we have full macro expansion tracking
1676      for substring locations to be available.  */
1677   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1678     return "track_macro_expansion != 2";
1679
1680   /* If #line or # 44 "file"-style directives are present, then there's
1681      no guarantee that the line numbers we have can be used to locate
1682      the strings.  For example, we might have a .i file with # directives
1683      pointing back to lines within a .c file, but the .c file might
1684      have been edited since the .i file was created.
1685      In such a case, the safest course is to disable on-demand substring
1686      locations.  */
1687   if (line_table->seen_line_directive)
1688     return "seen line directive";
1689
1690   /* If string concatenation has occurred at STRLOC, get the locations
1691      of all of the literal tokens making up the compound string.
1692      Otherwise, just use STRLOC.  */
1693   int num_locs = 1;
1694   location_t *strlocs = &strloc;
1695   if (concats)
1696     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1697
1698   auto_cpp_string_vec strs (num_locs);
1699   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1700   for (int i = 0; i < num_locs; i++)
1701     {
1702       /* Get range of strloc.  We will use it to locate the start and finish
1703          of the literal token within the line.  */
1704       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1705
1706       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1707         {
1708           /* If the string token was within a macro expansion, then we can
1709              cope with it for the simple case where we have a single token.
1710              Otherwise, bail out.  */
1711           if (src_range.m_start != src_range.m_finish)
1712             return "macro expansion";
1713         }
1714       else
1715         {
1716           if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1717             /* If so, we can't reliably determine where the token started within
1718                its line.  */
1719             return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1720
1721           if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1722             /* If so, we can't reliably determine where the token finished
1723                within its line.  */
1724             return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1725         }
1726
1727       expanded_location start
1728         = expand_location_to_spelling_point (src_range.m_start,
1729                                              LOCATION_ASPECT_START);
1730       expanded_location finish
1731         = expand_location_to_spelling_point (src_range.m_finish,
1732                                              LOCATION_ASPECT_FINISH);
1733       if (start.file != finish.file)
1734         return "range endpoints are in different files";
1735       if (start.line != finish.line)
1736         return "range endpoints are on different lines";
1737       if (start.column > finish.column)
1738         return "range endpoints are reversed";
1739
1740       char_span line = fc.get_source_line (start.file, start.line);
1741       if (!line)
1742         return "unable to read source line";
1743
1744       /* Determine the location of the literal (including quotes
1745          and leading prefix chars, such as the 'u' in a u""
1746          token).  */
1747       size_t literal_length = finish.column - start.column + 1;
1748
1749       /* Ensure that we don't crash if we got the wrong location.  */
1750       if (start.column < 1)
1751         return "zero start column";
1752       if (line.length () < (start.column - 1 + literal_length))
1753         return "line is not wide enough";
1754
1755       char_span literal = line.subspan (start.column - 1, literal_length);
1756
1757       cpp_string from;
1758       from.len = literal_length;
1759       /* Make a copy of the literal, to avoid having to rely on
1760          the lifetime of the copy of the line within the cache.
1761          This will be released by the auto_cpp_string_vec dtor.  */
1762       from.text = (unsigned char *)literal.xstrdup ();
1763       strs.safe_push (from);
1764
1765       /* For very long lines, a new linemap could have started
1766          halfway through the token.
1767          Ensure that the loc_reader uses the linemap of the
1768          *end* of the token for its start location.  */
1769       const line_map_ordinary *start_ord_map;
1770       linemap_resolve_location (line_table, src_range.m_start,
1771                                 LRK_SPELLING_LOCATION, &start_ord_map);
1772       const line_map_ordinary *final_ord_map;
1773       linemap_resolve_location (line_table, src_range.m_finish,
1774                                 LRK_SPELLING_LOCATION, &final_ord_map);
1775       if (start_ord_map == NULL || final_ord_map == NULL)
1776         return "failed to get ordinary maps";
1777       /* Bulletproofing.  We ought to only have different ordinary maps
1778          for start vs finish due to line-length jumps.  */
1779       if (start_ord_map != final_ord_map
1780           && start_ord_map->to_file != final_ord_map->to_file)
1781         return "start and finish are spelled in different ordinary maps";
1782       /* The file from linemap_resolve_location ought to match that from
1783          expand_location_to_spelling_point.  */
1784       if (start_ord_map->to_file != start.file)
1785         return "mismatching file after resolving linemap";
1786
1787       location_t start_loc
1788         = linemap_position_for_line_and_column (line_table, final_ord_map,
1789                                                 start.line, start.column);
1790
1791       cpp_string_location_reader loc_reader (start_loc, line_table);
1792       loc_readers.safe_push (loc_reader);
1793     }
1794
1795   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1796   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1797                                                  loc_readers.address (),
1798                                                  num_locs, &ranges, type);
1799   if (err)
1800     return err;
1801
1802   /* Success: "ranges" should now contain information on the string.  */
1803   return NULL;
1804 }
1805
1806 /* Attempt to populate *OUT_LOC with source location information on the
1807    given characters within the string literal found at STRLOC.
1808    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1809    character set.
1810
1811    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1812    and string literal "012345\n789"
1813    *OUT_LOC is written to with:
1814      "012345\n789"
1815          ~^~~~~
1816
1817    If CONCATS is non-NULL, then any string literals that the token at
1818    STRLOC was concatenated with are also considered.
1819
1820    This is implemented by re-parsing the relevant source line(s).
1821
1822    Return NULL if successful, or an error message if any errors occurred.
1823    Error messages are intended for GCC developers (to help debugging) rather
1824    than for end-users.  */
1825
1826 const char *
1827 get_location_within_string (cpp_reader *pfile,
1828                             file_cache &fc,
1829                             string_concat_db *concats,
1830                             location_t strloc,
1831                             enum cpp_ttype type,
1832                             int caret_idx, int start_idx, int end_idx,
1833                             location_t *out_loc)
1834 {
1835   gcc_checking_assert (caret_idx >= 0);
1836   gcc_checking_assert (start_idx >= 0);
1837   gcc_checking_assert (end_idx >= 0);
1838   gcc_assert (out_loc);
1839
1840   cpp_substring_ranges ranges;
1841   const char *err
1842     = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
1843   if (err)
1844     return err;
1845
1846   if (caret_idx >= ranges.get_num_ranges ())
1847     return "caret_idx out of range";
1848   if (start_idx >= ranges.get_num_ranges ())
1849     return "start_idx out of range";
1850   if (end_idx >= ranges.get_num_ranges ())
1851     return "end_idx out of range";
1852
1853   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1854                             ranges.get_range (start_idx).m_start,
1855                             ranges.get_range (end_idx).m_finish);
1856   return NULL;
1857 }
1858
1859 /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */
1860
1861 location_t
1862 location_with_discriminator (location_t locus, int discriminator)
1863 {
1864   tree block = LOCATION_BLOCK (locus);
1865   source_range src_range = get_range_from_loc (line_table, locus);
1866   locus = get_pure_location (locus);
1867
1868   if (locus == UNKNOWN_LOCATION)
1869     return locus;
1870
1871   return line_table->get_or_create_combined_loc (locus, src_range, block,
1872                                                  discriminator);
1873 }
1874
1875 /* Return TRUE if LOCUS represents a location with a discriminator.  */
1876
1877 bool
1878 has_discriminator (location_t locus)
1879 {
1880   return get_discriminator_from_loc (locus) != 0;
1881 }
1882
1883 /* Return the discriminator for LOCUS.  */
1884
1885 int
1886 get_discriminator_from_loc (location_t locus)
1887 {
1888   return get_discriminator_from_loc (line_table, locus);
1889 }
1890
1891 #if CHECKING_P
1892
1893 namespace selftest {
1894
1895 /* Selftests of location handling.  */
1896
1897 /* Attempt to populate *OUT_RANGE with source location information on the
1898    given character within the string literal found at STRLOC.
1899    CHAR_IDX refers to an offset within the execution character set.
1900    If CONCATS is non-NULL, then any string literals that the token at
1901    STRLOC was concatenated with are also considered.
1902
1903    This is implemented by re-parsing the relevant source line(s).
1904
1905    Return NULL if successful, or an error message if any errors occurred.
1906    Error messages are intended for GCC developers (to help debugging) rather
1907    than for end-users.  */
1908
1909 static const char *
1910 get_source_range_for_char (cpp_reader *pfile,
1911                            file_cache &fc,
1912                            string_concat_db *concats,
1913                            location_t strloc,
1914                            enum cpp_ttype type,
1915                            int char_idx,
1916                            source_range *out_range)
1917 {
1918   gcc_checking_assert (char_idx >= 0);
1919   gcc_assert (out_range);
1920
1921   cpp_substring_ranges ranges;
1922   const char *err
1923     = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
1924   if (err)
1925     return err;
1926
1927   if (char_idx >= ranges.get_num_ranges ())
1928     return "char_idx out of range";
1929
1930   *out_range = ranges.get_range (char_idx);
1931   return NULL;
1932 }
1933
1934 /* As get_source_range_for_char, but write to *OUT the number
1935    of ranges that are available.  */
1936
1937 static const char *
1938 get_num_source_ranges_for_substring (cpp_reader *pfile,
1939                                      file_cache &fc,
1940                                      string_concat_db *concats,
1941                                      location_t strloc,
1942                                      enum cpp_ttype type,
1943                                      int *out)
1944 {
1945   gcc_assert (out);
1946
1947   cpp_substring_ranges ranges;
1948   const char *err
1949     = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
1950
1951   if (err)
1952     return err;
1953
1954   *out = ranges.get_num_ranges ();
1955   return NULL;
1956 }
1957
1958 /* Selftests of location handling.  */
1959
1960 /* Verify that compare() on linenum_type handles comparisons over the full
1961    range of the type.  */
1962
1963 static void
1964 test_linenum_comparisons ()
1965 {
1966   linenum_type min_line (0);
1967   linenum_type max_line (0xffffffff);
1968   ASSERT_EQ (0, compare (min_line, min_line));
1969   ASSERT_EQ (0, compare (max_line, max_line));
1970
1971   ASSERT_GT (compare (max_line, min_line), 0);
1972   ASSERT_LT (compare (min_line, max_line), 0);
1973 }
1974
1975 /* Helper function for verifying location data: when location_t
1976    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1977    as having column 0.  */
1978
1979 static bool
1980 should_have_column_data_p (location_t loc)
1981 {
1982   if (IS_ADHOC_LOC (loc))
1983     loc = get_location_from_adhoc_loc (line_table, loc);
1984   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1985     return false;
1986   return true;
1987 }
1988
1989 /* Selftest for should_have_column_data_p.  */
1990
1991 static void
1992 test_should_have_column_data_p ()
1993 {
1994   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1995   ASSERT_TRUE
1996     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1997   ASSERT_FALSE
1998     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1999 }
2000
2001 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
2002    on LOC.  */
2003
2004 static void
2005 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
2006               location_t loc)
2007 {
2008   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
2009   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
2010   /* If location_t values are sufficiently high, then column numbers
2011      will be unavailable and LOCATION_COLUMN (loc) will be 0.
2012      When close to the threshold, column numbers *may* be present: if
2013      the final linemap before the threshold contains a line that straddles
2014      the threshold, locations in that line have column information.  */
2015   if (should_have_column_data_p (loc))
2016     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
2017 }
2018
2019 /* Various selftests involve constructing a line table and one or more
2020    line maps within it.
2021
2022    For maximum test coverage we want to run these tests with a variety
2023    of situations:
2024    - line_table->default_range_bits: some frontends use a non-zero value
2025    and others use zero
2026    - the fallback modes within line-map.cc: there are various threshold
2027    values for location_t beyond line-map.cc changes
2028    behavior (disabling of the range-packing optimization, disabling
2029    of column-tracking).  We can exercise these by starting the line_table
2030    at interesting values at or near these thresholds.
2031
2032    The following struct describes a particular case within our test
2033    matrix.  */
2034
2035 class line_table_case
2036 {
2037 public:
2038   line_table_case (int default_range_bits, int base_location)
2039   : m_default_range_bits (default_range_bits),
2040     m_base_location (base_location)
2041   {}
2042
2043   int m_default_range_bits;
2044   int m_base_location;
2045 };
2046
2047 /* Constructor.  Store the old value of line_table, and create a new
2048    one, using sane defaults.  */
2049
2050 line_table_test::line_table_test ()
2051 {
2052   gcc_assert (saved_line_table == NULL);
2053   saved_line_table = line_table;
2054   line_table = ggc_alloc<line_maps> ();
2055   linemap_init (line_table, BUILTINS_LOCATION);
2056   gcc_assert (saved_line_table->m_reallocator);
2057   line_table->m_reallocator = saved_line_table->m_reallocator;
2058   gcc_assert (saved_line_table->m_round_alloc_size);
2059   line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2060   line_table->default_range_bits = 0;
2061 }
2062
2063 /* Constructor.  Store the old value of line_table, and create a new
2064    one, using the sitation described in CASE_.  */
2065
2066 line_table_test::line_table_test (const line_table_case &case_)
2067 {
2068   gcc_assert (saved_line_table == NULL);
2069   saved_line_table = line_table;
2070   line_table = ggc_alloc<line_maps> ();
2071   linemap_init (line_table, BUILTINS_LOCATION);
2072   gcc_assert (saved_line_table->m_reallocator);
2073   line_table->m_reallocator = saved_line_table->m_reallocator;
2074   gcc_assert (saved_line_table->m_round_alloc_size);
2075   line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2076   line_table->default_range_bits = case_.m_default_range_bits;
2077   if (case_.m_base_location)
2078     {
2079       line_table->highest_location = case_.m_base_location;
2080       line_table->highest_line = case_.m_base_location;
2081     }
2082 }
2083
2084 /* Destructor.  Restore the old value of line_table.  */
2085
2086 line_table_test::~line_table_test ()
2087 {
2088   gcc_assert (saved_line_table != NULL);
2089   line_table = saved_line_table;
2090   saved_line_table = NULL;
2091 }
2092
2093 /* Verify basic operation of ordinary linemaps.  */
2094
2095 static void
2096 test_accessing_ordinary_linemaps (const line_table_case &case_)
2097 {
2098   line_table_test ltt (case_);
2099
2100   /* Build a simple linemap describing some locations. */
2101   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
2102
2103   linemap_line_start (line_table, 1, 100);
2104   location_t loc_a = linemap_position_for_column (line_table, 1);
2105   location_t loc_b = linemap_position_for_column (line_table, 23);
2106
2107   linemap_line_start (line_table, 2, 100);
2108   location_t loc_c = linemap_position_for_column (line_table, 1);
2109   location_t loc_d = linemap_position_for_column (line_table, 17);
2110
2111   /* Example of a very long line.  */
2112   linemap_line_start (line_table, 3, 2000);
2113   location_t loc_e = linemap_position_for_column (line_table, 700);
2114
2115   /* Transitioning back to a short line.  */
2116   linemap_line_start (line_table, 4, 0);
2117   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
2118
2119   if (should_have_column_data_p (loc_back_to_short))
2120     {
2121       /* Verify that we switched to short lines in the linemap.  */
2122       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
2123       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
2124     }
2125
2126   /* Example of a line that will eventually be seen to be longer
2127      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
2128      below that.  */
2129   linemap_line_start (line_table, 5, 2000);
2130
2131   location_t loc_start_of_very_long_line
2132     = linemap_position_for_column (line_table, 2000);
2133   location_t loc_too_wide
2134     = linemap_position_for_column (line_table, 4097);
2135   location_t loc_too_wide_2
2136     = linemap_position_for_column (line_table, 4098);
2137
2138   /* ...and back to a sane line length.  */
2139   linemap_line_start (line_table, 6, 100);
2140   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
2141
2142   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2143
2144   /* Multiple files.  */
2145   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
2146   linemap_line_start (line_table, 1, 200);
2147   location_t loc_f = linemap_position_for_column (line_table, 150);
2148   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2149
2150   /* Verify that we can recover the location info.  */
2151   assert_loceq ("foo.c", 1, 1, loc_a);
2152   assert_loceq ("foo.c", 1, 23, loc_b);
2153   assert_loceq ("foo.c", 2, 1, loc_c);
2154   assert_loceq ("foo.c", 2, 17, loc_d);
2155   assert_loceq ("foo.c", 3, 700, loc_e);
2156   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
2157
2158   /* In the very wide line, the initial location should be fully tracked.  */
2159   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
2160   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
2161      be disabled.  */
2162   assert_loceq ("foo.c", 5, 0, loc_too_wide);
2163   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
2164   /*...and column-tracking should be re-enabled for subsequent lines.  */
2165   assert_loceq ("foo.c", 6, 10, loc_sane_again);
2166
2167   assert_loceq ("bar.c", 1, 150, loc_f);
2168
2169   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2170   ASSERT_TRUE (pure_location_p (line_table, loc_a));
2171
2172   /* Verify using make_location to build a range, and extracting data
2173      back from it.  */
2174   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2175   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2176   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2177   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2178   ASSERT_EQ (loc_b, src_range.m_start);
2179   ASSERT_EQ (loc_d, src_range.m_finish);
2180 }
2181
2182 /* Verify various properties of UNKNOWN_LOCATION.  */
2183
2184 static void
2185 test_unknown_location ()
2186 {
2187   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2188   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2189   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2190 }
2191
2192 /* Verify various properties of BUILTINS_LOCATION.  */
2193
2194 static void
2195 test_builtins ()
2196 {
2197   assert_loceq (special_fname_builtin (), 0, 0, BUILTINS_LOCATION);
2198   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2199 }
2200
2201 /* Regression test for make_location.
2202    Ensure that we use pure locations for the start/finish of the range,
2203    rather than storing a packed or ad-hoc range as the start/finish.  */
2204
2205 static void
2206 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2207 {
2208   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2209      with C++ frontend.
2210      ....................0000000001111111111222.
2211      ....................1234567890123456789012.  */
2212   const char *content = "     r += !aaa == bbb;\n";
2213   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2214   line_table_test ltt (case_);
2215   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2216
2217   const location_t c11 = linemap_position_for_column (line_table, 11);
2218   const location_t c12 = linemap_position_for_column (line_table, 12);
2219   const location_t c13 = linemap_position_for_column (line_table, 13);
2220   const location_t c14 = linemap_position_for_column (line_table, 14);
2221   const location_t c21 = linemap_position_for_column (line_table, 21);
2222
2223   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2224     return;
2225
2226   /* Use column 13 for the caret location, arbitrarily, to verify that we
2227      handle start != caret.  */
2228   const location_t aaa = make_location (c13, c12, c14);
2229   ASSERT_EQ (c13, get_pure_location (aaa));
2230   ASSERT_EQ (c12, get_start (aaa));
2231   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2232   ASSERT_EQ (c14, get_finish (aaa));
2233   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2234
2235   /* Make a location using a location with a range as the start-point.  */
2236   const location_t not_aaa = make_location (c11, aaa, c14);
2237   ASSERT_EQ (c11, get_pure_location (not_aaa));
2238   /* It should use the start location of the range, not store the range
2239      itself.  */
2240   ASSERT_EQ (c12, get_start (not_aaa));
2241   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2242   ASSERT_EQ (c14, get_finish (not_aaa));
2243   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2244
2245   /* Similarly, make a location with a range as the end-point.  */
2246   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2247   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2248   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2249   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2250   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2251   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2252   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2253   /* It should use the finish location of the range, not store the range
2254      itself.  */
2255   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2256   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2257   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2258   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2259   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2260 }
2261
2262 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
2263
2264 static void
2265 test_reading_source_line ()
2266 {
2267   /* Create a tempfile and write some text to it.  */
2268   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2269                         "01234567890123456789\n"
2270                         "This is the test text\n"
2271                         "This is the 3rd line");
2272   file_cache fc;
2273
2274   /* Read back a specific line from the tempfile.  */
2275   char_span source_line = fc.get_source_line (tmp.get_filename (), 3);
2276   ASSERT_TRUE (source_line);
2277   ASSERT_TRUE (source_line.get_buffer () != NULL);
2278   ASSERT_EQ (20, source_line.length ());
2279   ASSERT_TRUE (!strncmp ("This is the 3rd line",
2280                          source_line.get_buffer (), source_line.length ()));
2281
2282   source_line = fc.get_source_line (tmp.get_filename (), 2);
2283   ASSERT_TRUE (source_line);
2284   ASSERT_TRUE (source_line.get_buffer () != NULL);
2285   ASSERT_EQ (21, source_line.length ());
2286   ASSERT_TRUE (!strncmp ("This is the test text",
2287                          source_line.get_buffer (), source_line.length ()));
2288
2289   source_line = fc.get_source_line (tmp.get_filename (), 4);
2290   ASSERT_FALSE (source_line);
2291   ASSERT_TRUE (source_line.get_buffer () == NULL);
2292 }
2293
2294 /* Tests of lexing.  */
2295
2296 /* Verify that token TOK from PARSER has cpp_token_as_text
2297    equal to EXPECTED_TEXT.  */
2298
2299 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
2300   SELFTEST_BEGIN_STMT                                                   \
2301     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
2302     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
2303   SELFTEST_END_STMT
2304
2305 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2306    and ranges from EXP_START_COL to EXP_FINISH_COL.
2307    Use LOC as the effective location of the selftest.  */
2308
2309 static void
2310 assert_token_loc_eq (const location &loc,
2311                      const cpp_token *tok,
2312                      const char *exp_filename, int exp_linenum,
2313                      int exp_start_col, int exp_finish_col)
2314 {
2315   location_t tok_loc = tok->src_loc;
2316   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2317   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2318
2319   /* If location_t values are sufficiently high, then column numbers
2320      will be unavailable.  */
2321   if (!should_have_column_data_p (tok_loc))
2322     return;
2323
2324   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2325   source_range tok_range = get_range_from_loc (line_table, tok_loc);
2326   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2327   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2328 }
2329
2330 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2331    SELFTEST_LOCATION as the effective location of the selftest.  */
2332
2333 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2334                             EXP_START_COL, EXP_FINISH_COL) \
2335   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2336                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2337
2338 /* Test of lexing a file using libcpp, verifying tokens and their
2339    location information.  */
2340
2341 static void
2342 test_lexer (const line_table_case &case_)
2343 {
2344   /* Create a tempfile and write some text to it.  */
2345   const char *content =
2346     /*00000000011111111112222222222333333.3333444444444.455555555556
2347       12345678901234567890123456789012345.6789012345678.901234567890.  */
2348     ("test_name /* c-style comment */\n"
2349      "                                  \"test literal\"\n"
2350      " // test c++-style comment\n"
2351      "   42\n");
2352   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2353
2354   line_table_test ltt (case_);
2355
2356   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2357
2358   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2359   ASSERT_NE (fname, NULL);
2360
2361   /* Verify that we get the expected tokens back, with the correct
2362      location information.  */
2363
2364   location_t loc;
2365   const cpp_token *tok;
2366   tok = cpp_get_token_with_location (parser, &loc);
2367   ASSERT_NE (tok, NULL);
2368   ASSERT_EQ (tok->type, CPP_NAME);
2369   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2370   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2371
2372   tok = cpp_get_token_with_location (parser, &loc);
2373   ASSERT_NE (tok, NULL);
2374   ASSERT_EQ (tok->type, CPP_STRING);
2375   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2376   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2377
2378   tok = cpp_get_token_with_location (parser, &loc);
2379   ASSERT_NE (tok, NULL);
2380   ASSERT_EQ (tok->type, CPP_NUMBER);
2381   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2382   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2383
2384   tok = cpp_get_token_with_location (parser, &loc);
2385   ASSERT_NE (tok, NULL);
2386   ASSERT_EQ (tok->type, CPP_EOF);
2387
2388   cpp_finish (parser, NULL);
2389   cpp_destroy (parser);
2390 }
2391
2392 /* Forward decls.  */
2393
2394 class lexer_test;
2395 class lexer_test_options;
2396
2397 /* A class for specifying options of a lexer_test.
2398    The "apply" vfunc is called during the lexer_test constructor.  */
2399
2400 class lexer_test_options
2401 {
2402  public:
2403   virtual void apply (lexer_test &) = 0;
2404 };
2405
2406 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2407    in its dtor.
2408
2409    This is needed by struct lexer_test to ensure that the cleanup of the
2410    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2411
2412 class cpp_reader_ptr
2413 {
2414  public:
2415   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2416
2417   ~cpp_reader_ptr ()
2418   {
2419     cpp_finish (m_ptr, NULL);
2420     cpp_destroy (m_ptr);
2421   }
2422
2423   operator cpp_reader * () const { return m_ptr; }
2424
2425  private:
2426   cpp_reader *m_ptr;
2427 };
2428
2429 /* A struct for writing lexer tests.  */
2430
2431 class lexer_test
2432 {
2433 public:
2434   lexer_test (const line_table_case &case_, const char *content,
2435               lexer_test_options *options);
2436   ~lexer_test ();
2437
2438   const cpp_token *get_token ();
2439
2440   /* The ordering of these fields matters.
2441      The line_table_test must be first, since the cpp_reader_ptr
2442      uses it.
2443      The cpp_reader must be cleaned up *after* the temp_source_file
2444      since the filenames in input.cc's input cache are owned by the
2445      cpp_reader; in particular, when ~temp_source_file evicts the
2446      filename the filenames must still be alive.  */
2447   line_table_test m_ltt;
2448   cpp_reader_ptr m_parser;
2449   temp_source_file m_tempfile;
2450   file_cache m_file_cache;
2451   string_concat_db m_concats;
2452   bool m_implicitly_expect_EOF;
2453 };
2454
2455 /* Use an EBCDIC encoding for the execution charset, specifically
2456    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2457
2458    This exercises iconv integration within libcpp.
2459    Not every build of iconv supports the given charset,
2460    so we need to flag this error and handle it gracefully.  */
2461
2462 class ebcdic_execution_charset : public lexer_test_options
2463 {
2464  public:
2465   ebcdic_execution_charset () : m_num_iconv_errors (0)
2466     {
2467       gcc_assert (s_singleton == NULL);
2468       s_singleton = this;
2469     }
2470   ~ebcdic_execution_charset ()
2471     {
2472       gcc_assert (s_singleton == this);
2473       s_singleton = NULL;
2474     }
2475
2476   void apply (lexer_test &test) final override
2477   {
2478     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2479     cpp_opts->narrow_charset = "IBM1047";
2480
2481     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2482     callbacks->diagnostic = on_diagnostic;
2483   }
2484
2485   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2486                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2487                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2488                              rich_location *richloc ATTRIBUTE_UNUSED,
2489                              const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2490     ATTRIBUTE_FPTR_PRINTF(5,0)
2491   {
2492     gcc_assert (s_singleton);
2493     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2494     const char *msg = "conversion from %s to %s not supported by iconv";
2495 #ifdef ENABLE_NLS
2496     msg = dgettext ("cpplib", msg);
2497 #endif
2498     /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
2499        when the local iconv build doesn't support the conversion.  */
2500     if (strcmp (msgid, msg) == 0)
2501       {
2502         s_singleton->m_num_iconv_errors++;
2503         return true;
2504       }
2505
2506     /* Otherwise, we have an unexpected error.  */
2507     abort ();
2508   }
2509
2510   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2511
2512  private:
2513   static ebcdic_execution_charset *s_singleton;
2514   int m_num_iconv_errors;
2515 };
2516
2517 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2518
2519 /* A lexer_test_options subclass that records a list of diagnostic
2520    messages emitted by the lexer.  */
2521
2522 class lexer_diagnostic_sink : public lexer_test_options
2523 {
2524  public:
2525   lexer_diagnostic_sink ()
2526   {
2527     gcc_assert (s_singleton == NULL);
2528     s_singleton = this;
2529   }
2530   ~lexer_diagnostic_sink ()
2531   {
2532     gcc_assert (s_singleton == this);
2533     s_singleton = NULL;
2534
2535     int i;
2536     char *str;
2537     FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2538       free (str);
2539   }
2540
2541   void apply (lexer_test &test) final override
2542   {
2543     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2544     callbacks->diagnostic = on_diagnostic;
2545   }
2546
2547   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2548                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2549                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2550                              rich_location *richloc ATTRIBUTE_UNUSED,
2551                              const char *msgid, va_list *ap)
2552     ATTRIBUTE_FPTR_PRINTF(5,0)
2553   {
2554     char *msg = xvasprintf (msgid, *ap);
2555     s_singleton->m_diagnostics.safe_push (msg);
2556     return true;
2557   }
2558
2559   auto_vec<char *> m_diagnostics;
2560
2561  private:
2562   static lexer_diagnostic_sink *s_singleton;
2563 };
2564
2565 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2566
2567 /* Constructor.  Override line_table with a new instance based on CASE_,
2568    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2569    start parsing the tempfile.  */
2570
2571 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2572                         lexer_test_options *options)
2573 : m_ltt (case_),
2574   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2575   /* Create a tempfile and write the text to it.  */
2576   m_tempfile (SELFTEST_LOCATION, ".c", content),
2577   m_concats (),
2578   m_implicitly_expect_EOF (true)
2579 {
2580   if (options)
2581     options->apply (*this);
2582
2583   cpp_init_iconv (m_parser);
2584
2585   /* Parse the file.  */
2586   const char *fname = cpp_read_main_file (m_parser,
2587                                           m_tempfile.get_filename ());
2588   ASSERT_NE (fname, NULL);
2589 }
2590
2591 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2592
2593 lexer_test::~lexer_test ()
2594 {
2595   location_t loc;
2596   const cpp_token *tok;
2597
2598   if (m_implicitly_expect_EOF)
2599     {
2600       tok = cpp_get_token_with_location (m_parser, &loc);
2601       ASSERT_NE (tok, NULL);
2602       ASSERT_EQ (tok->type, CPP_EOF);
2603     }
2604 }
2605
2606 /* Get the next token from m_parser.  */
2607
2608 const cpp_token *
2609 lexer_test::get_token ()
2610 {
2611   location_t loc;
2612   const cpp_token *tok;
2613
2614   tok = cpp_get_token_with_location (m_parser, &loc);
2615   ASSERT_NE (tok, NULL);
2616   return tok;
2617 }
2618
2619 /* Verify that locations within string literals are correctly handled.  */
2620
2621 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2622    using the string concatenation database for TEST.
2623
2624    Assert that the character at index IDX is on EXPECTED_LINE,
2625    and that it begins at column EXPECTED_START_COL and ends at
2626    EXPECTED_FINISH_COL (unless the locations are beyond
2627    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2628    columns).  */
2629
2630 static void
2631 assert_char_at_range (const location &loc,
2632                       lexer_test& test,
2633                       location_t strloc, enum cpp_ttype type, int idx,
2634                       int expected_line, int expected_start_col,
2635                       int expected_finish_col)
2636 {
2637   cpp_reader *pfile = test.m_parser;
2638   string_concat_db *concats = &test.m_concats;
2639
2640   source_range actual_range = source_range();
2641   const char *err
2642     = get_source_range_for_char (pfile, test.m_file_cache,
2643                                  concats, strloc, type, idx,
2644                                  &actual_range);
2645   if (should_have_column_data_p (strloc))
2646     ASSERT_EQ_AT (loc, NULL, err);
2647   else
2648     {
2649       ASSERT_STREQ_AT (loc,
2650                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2651                        err);
2652       return;
2653     }
2654
2655   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2656   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2657   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2658   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2659
2660   if (should_have_column_data_p (actual_range.m_start))
2661     {
2662       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2663       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2664     }
2665   if (should_have_column_data_p (actual_range.m_finish))
2666     {
2667       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2668       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2669     }
2670 }
2671
2672 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2673    the effective location of any errors.  */
2674
2675 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2676                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2677   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2678                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2679                         (EXPECTED_FINISH_COL))
2680
2681 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2682    using the string concatenation database for TEST.
2683
2684    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2685
2686 static void
2687 assert_num_substring_ranges (const location &loc,
2688                              lexer_test& test,
2689                              location_t strloc,
2690                              enum cpp_ttype type,
2691                              int expected_num_ranges)
2692 {
2693   cpp_reader *pfile = test.m_parser;
2694   string_concat_db *concats = &test.m_concats;
2695
2696   int actual_num_ranges = -1;
2697   const char *err
2698     = get_num_source_ranges_for_substring (pfile, test.m_file_cache,
2699                                            concats, strloc, type,
2700                                            &actual_num_ranges);
2701   if (should_have_column_data_p (strloc))
2702     ASSERT_EQ_AT (loc, NULL, err);
2703   else
2704     {
2705       ASSERT_STREQ_AT (loc,
2706                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2707                        err);
2708       return;
2709     }
2710   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2711 }
2712
2713 /* Macro for calling assert_num_substring_ranges, supplying
2714    SELFTEST_LOCATION for the effective location of any errors.  */
2715
2716 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2717                                     EXPECTED_NUM_RANGES)                \
2718   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2719                                (TYPE), (EXPECTED_NUM_RANGES))
2720
2721
2722 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2723    returns an error (using the string concatenation database for TEST).  */
2724
2725 static void
2726 assert_has_no_substring_ranges (const location &loc,
2727                                 lexer_test& test,
2728                                 location_t strloc,
2729                                 enum cpp_ttype type,
2730                                 const char *expected_err)
2731 {
2732   cpp_reader *pfile = test.m_parser;
2733   string_concat_db *concats = &test.m_concats;
2734   cpp_substring_ranges ranges;
2735   const char *actual_err
2736     = get_substring_ranges_for_loc (pfile, test.m_file_cache, concats, strloc,
2737                                     type, ranges);
2738   if (should_have_column_data_p (strloc))
2739     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2740   else
2741     ASSERT_STREQ_AT (loc,
2742                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2743                      actual_err);
2744 }
2745
2746 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2747     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2748                                     (STRLOC), (TYPE), (ERR))
2749
2750 /* Lex a simple string literal.  Verify the substring location data, before
2751    and after running cpp_interpret_string on it.  */
2752
2753 static void
2754 test_lexer_string_locations_simple (const line_table_case &case_)
2755 {
2756   /* Digits 0-9 (with 0 at column 10), the simple way.
2757      ....................000000000.11111111112.2222222223333333333
2758      ....................123456789.01234567890.1234567890123456789
2759      We add a trailing comment to ensure that we correctly locate
2760      the end of the string literal token.  */
2761   const char *content = "        \"0123456789\" /* not a string */\n";
2762   lexer_test test (case_, content, NULL);
2763
2764   /* Verify that we get the expected token back, with the correct
2765      location information.  */
2766   const cpp_token *tok = test.get_token ();
2767   ASSERT_EQ (tok->type, CPP_STRING);
2768   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2769   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2770
2771   /* At this point in lexing, the quote characters are treated as part of
2772      the string (they are stripped off by cpp_interpret_string).  */
2773
2774   ASSERT_EQ (tok->val.str.len, 12);
2775
2776   /* Verify that cpp_interpret_string works.  */
2777   cpp_string dst_string;
2778   const enum cpp_ttype type = CPP_STRING;
2779   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2780                                       &dst_string, type);
2781   ASSERT_TRUE (result);
2782   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2783   free (const_cast <unsigned char *> (dst_string.text));
2784
2785   /* Verify ranges of individual characters.  This no longer includes the
2786      opening quote, but does include the closing quote.  */
2787   for (int i = 0; i <= 10; i++)
2788     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2789                           10 + i, 10 + i);
2790
2791   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2792 }
2793
2794 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2795    encoding.  */
2796
2797 static void
2798 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2799 {
2800   /* EBCDIC support requires iconv.  */
2801   if (!HAVE_ICONV)
2802     return;
2803
2804   /* Digits 0-9 (with 0 at column 10), the simple way.
2805      ....................000000000.11111111112.2222222223333333333
2806      ....................123456789.01234567890.1234567890123456789
2807      We add a trailing comment to ensure that we correctly locate
2808      the end of the string literal token.  */
2809   const char *content = "        \"0123456789\" /* not a string */\n";
2810   ebcdic_execution_charset use_ebcdic;
2811   lexer_test test (case_, content, &use_ebcdic);
2812
2813   /* Verify that we get the expected token back, with the correct
2814      location information.  */
2815   const cpp_token *tok = test.get_token ();
2816   ASSERT_EQ (tok->type, CPP_STRING);
2817   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2818   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2819
2820   /* At this point in lexing, the quote characters are treated as part of
2821      the string (they are stripped off by cpp_interpret_string).  */
2822
2823   ASSERT_EQ (tok->val.str.len, 12);
2824
2825   /* The remainder of the test requires an iconv implementation that
2826      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2827   if (use_ebcdic.iconv_errors_occurred_p ())
2828     return;
2829
2830   /* Verify that cpp_interpret_string works.  */
2831   cpp_string dst_string;
2832   const enum cpp_ttype type = CPP_STRING;
2833   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2834                                       &dst_string, type);
2835   ASSERT_TRUE (result);
2836   /* We should now have EBCDIC-encoded text, specifically
2837      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2838      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2839   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2840                 (const char *)dst_string.text);
2841   free (const_cast <unsigned char *> (dst_string.text));
2842
2843   /* Verify that we don't attempt to record substring location information
2844      for such cases.  */
2845   ASSERT_HAS_NO_SUBSTRING_RANGES
2846     (test, tok->src_loc, type,
2847      "execution character set != source character set");
2848 }
2849
2850 /* Lex a string literal containing a hex-escaped character.
2851    Verify the substring location data, before and after running
2852    cpp_interpret_string on it.  */
2853
2854 static void
2855 test_lexer_string_locations_hex (const line_table_case &case_)
2856 {
2857   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2858      and with a space in place of digit 6, to terminate the escaped
2859      hex code.
2860      ....................000000000.111111.11112222.
2861      ....................123456789.012345.67890123.  */
2862   const char *content = "        \"01234\\x35 789\"\n";
2863   lexer_test test (case_, content, NULL);
2864
2865   /* Verify that we get the expected token back, with the correct
2866      location information.  */
2867   const cpp_token *tok = test.get_token ();
2868   ASSERT_EQ (tok->type, CPP_STRING);
2869   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2870   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2871
2872   /* At this point in lexing, the quote characters are treated as part of
2873      the string (they are stripped off by cpp_interpret_string).  */
2874   ASSERT_EQ (tok->val.str.len, 15);
2875
2876   /* Verify that cpp_interpret_string works.  */
2877   cpp_string dst_string;
2878   const enum cpp_ttype type = CPP_STRING;
2879   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2880                                       &dst_string, type);
2881   ASSERT_TRUE (result);
2882   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2883   free (const_cast <unsigned char *> (dst_string.text));
2884
2885   /* Verify ranges of individual characters.  This no longer includes the
2886      opening quote, but does include the closing quote.  */
2887   for (int i = 0; i <= 4; i++)
2888     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2889   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2890   for (int i = 6; i <= 10; i++)
2891     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2892
2893   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2894 }
2895
2896 /* Lex a string literal containing an octal-escaped character.
2897    Verify the substring location data after running cpp_interpret_string
2898    on it.  */
2899
2900 static void
2901 test_lexer_string_locations_oct (const line_table_case &case_)
2902 {
2903   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2904      and with a space in place of digit 6, to terminate the escaped
2905      octal code.
2906      ....................000000000.111111.11112222.2222223333333333444
2907      ....................123456789.012345.67890123.4567890123456789012  */
2908   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2909   lexer_test test (case_, content, NULL);
2910
2911   /* Verify that we get the expected token back, with the correct
2912      location information.  */
2913   const cpp_token *tok = test.get_token ();
2914   ASSERT_EQ (tok->type, CPP_STRING);
2915   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2916
2917   /* Verify that cpp_interpret_string works.  */
2918   cpp_string dst_string;
2919   const enum cpp_ttype type = CPP_STRING;
2920   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2921                                       &dst_string, type);
2922   ASSERT_TRUE (result);
2923   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2924   free (const_cast <unsigned char *> (dst_string.text));
2925
2926   /* Verify ranges of individual characters.  This no longer includes the
2927      opening quote, but does include the closing quote.  */
2928   for (int i = 0; i < 5; i++)
2929     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2930   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2931   for (int i = 6; i <= 10; i++)
2932     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2933
2934   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2935 }
2936
2937 /* Test of string literal containing letter escapes.  */
2938
2939 static void
2940 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2941 {
2942   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2943      .....................000000000.1.11111.1.1.11222.22222223333333
2944      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2945   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2946   lexer_test test (case_, content, NULL);
2947
2948   /* Verify that we get the expected tokens back.  */
2949   const cpp_token *tok = test.get_token ();
2950   ASSERT_EQ (tok->type, CPP_STRING);
2951   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2952
2953   /* Verify ranges of individual characters. */
2954   /* "\t".  */
2955   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2956                         0, 1, 10, 11);
2957   /* "foo". */
2958   for (int i = 1; i <= 3; i++)
2959     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2960                           i, 1, 11 + i, 11 + i);
2961   /* "\\" and "\n".  */
2962   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2963                         4, 1, 15, 16);
2964   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2965                         5, 1, 17, 18);
2966
2967   /* "bar" and closing quote for nul-terminator.  */
2968   for (int i = 6; i <= 9; i++)
2969     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2970                           i, 1, 13 + i, 13 + i);
2971
2972   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2973 }
2974
2975 /* Another test of a string literal containing a letter escape.
2976    Based on string seen in
2977      printf ("%-%\n");
2978    in gcc.dg/format/c90-printf-1.c.  */
2979
2980 static void
2981 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2982 {
2983   /* .....................000000000.1111.11.1111.22222222223.
2984      .....................123456789.0123.45.6789.01234567890.  */
2985   const char *content = ("        \"%-%\\n\" /* non-str */\n");
2986   lexer_test test (case_, content, NULL);
2987
2988   /* Verify that we get the expected tokens back.  */
2989   const cpp_token *tok = test.get_token ();
2990   ASSERT_EQ (tok->type, CPP_STRING);
2991   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2992
2993   /* Verify ranges of individual characters. */
2994   /* "%-%".  */
2995   for (int i = 0; i < 3; i++)
2996     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2997                           i, 1, 10 + i, 10 + i);
2998   /* "\n".  */
2999   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3000                         3, 1, 13, 14);
3001
3002   /* Closing quote for nul-terminator.  */
3003   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3004                         4, 1, 15, 15);
3005
3006   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
3007 }
3008
3009 /* Lex a string literal containing UCN 4 characters.
3010    Verify the substring location data after running cpp_interpret_string
3011    on it.  */
3012
3013 static void
3014 test_lexer_string_locations_ucn4 (const line_table_case &case_)
3015 {
3016   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
3017      as UCN 4.
3018      ....................000000000.111111.111122.222222223.33333333344444
3019      ....................123456789.012345.678901.234567890.12345678901234  */
3020   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
3021   lexer_test test (case_, content, NULL);
3022
3023   /* Verify that we get the expected token back, with the correct
3024      location information.  */
3025   const cpp_token *tok = test.get_token ();
3026   ASSERT_EQ (tok->type, CPP_STRING);
3027   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
3028
3029   /* Verify that cpp_interpret_string works.
3030      The string should be encoded in the execution character
3031      set.  Assuming that is UTF-8, we should have the following:
3032      -----------  ----  -----  -------  ----------------
3033      Byte offset  Byte  Octal  Unicode  Source Column(s)
3034      -----------  ----  -----  -------  ----------------
3035      0            0x30         '0'      10
3036      1            0x31         '1'      11
3037      2            0x32         '2'      12
3038      3            0x33         '3'      13
3039      4            0x34         '4'      14
3040      5            0xE2  \342   U+2174   15-20
3041      6            0x85  \205    (cont)  15-20
3042      7            0xB4  \264    (cont)  15-20
3043      8            0xE2  \342   U+2175   21-26
3044      9            0x85  \205    (cont)  21-26
3045      10           0xB5  \265    (cont)  21-26
3046      11           0x37         '7'      27
3047      12           0x38         '8'      28
3048      13           0x39         '9'      29
3049      14           0x00                  30 (closing quote)
3050      -----------  ----  -----  -------  ---------------.  */
3051
3052   cpp_string dst_string;
3053   const enum cpp_ttype type = CPP_STRING;
3054   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3055                                       &dst_string, type);
3056   ASSERT_TRUE (result);
3057   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3058                 (const char *)dst_string.text);
3059   free (const_cast <unsigned char *> (dst_string.text));
3060
3061   /* Verify ranges of individual characters.  This no longer includes the
3062      opening quote, but does include the closing quote.
3063      '01234'.  */
3064   for (int i = 0; i <= 4; i++)
3065     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3066   /* U+2174.  */
3067   for (int i = 5; i <= 7; i++)
3068     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
3069   /* U+2175.  */
3070   for (int i = 8; i <= 10; i++)
3071     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
3072   /* '789' and nul terminator  */
3073   for (int i = 11; i <= 14; i++)
3074     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
3075
3076   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3077 }
3078
3079 /* Lex a string literal containing UCN 8 characters.
3080    Verify the substring location data after running cpp_interpret_string
3081    on it.  */
3082
3083 static void
3084 test_lexer_string_locations_ucn8 (const line_table_case &case_)
3085 {
3086   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
3087      ....................000000000.111111.1111222222.2222333333333.344444
3088      ....................123456789.012345.6789012345.6789012345678.901234  */
3089   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
3090   lexer_test test (case_, content, NULL);
3091
3092   /* Verify that we get the expected token back, with the correct
3093      location information.  */
3094   const cpp_token *tok = test.get_token ();
3095   ASSERT_EQ (tok->type, CPP_STRING);
3096   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
3097                            "\"01234\\U00002174\\U00002175789\"");
3098
3099   /* Verify that cpp_interpret_string works.
3100      The UTF-8 encoding of the string is identical to that from
3101      the ucn4 testcase above; the only difference is the column
3102      locations.  */
3103   cpp_string dst_string;
3104   const enum cpp_ttype type = CPP_STRING;
3105   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3106                                       &dst_string, type);
3107   ASSERT_TRUE (result);
3108   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3109                 (const char *)dst_string.text);
3110   free (const_cast <unsigned char *> (dst_string.text));
3111
3112   /* Verify ranges of individual characters.  This no longer includes the
3113      opening quote, but does include the closing quote.
3114      '01234'.  */
3115   for (int i = 0; i <= 4; i++)
3116     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3117   /* U+2174.  */
3118   for (int i = 5; i <= 7; i++)
3119     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
3120   /* U+2175.  */
3121   for (int i = 8; i <= 10; i++)
3122     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
3123   /* '789' at columns 35-37  */
3124   for (int i = 11; i <= 13; i++)
3125     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
3126   /* Closing quote/nul-terminator at column 38.  */
3127   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
3128
3129   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3130 }
3131
3132 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
3133
3134 static uint32_t
3135 uint32_from_big_endian (const uint32_t *ptr_be_value)
3136 {
3137   const unsigned char *buf = (const unsigned char *)ptr_be_value;
3138   return (((uint32_t) buf[0] << 24)
3139           | ((uint32_t) buf[1] << 16)
3140           | ((uint32_t) buf[2] << 8)
3141           | (uint32_t) buf[3]);
3142 }
3143
3144 /* Lex a wide string literal and verify that attempts to read substring
3145    location data from it fail gracefully.  */
3146
3147 static void
3148 test_lexer_string_locations_wide_string (const line_table_case &case_)
3149 {
3150   /* Digits 0-9.
3151      ....................000000000.11111111112.22222222233333
3152      ....................123456789.01234567890.12345678901234  */
3153   const char *content = "       L\"0123456789\" /* non-str */\n";
3154   lexer_test test (case_, content, NULL);
3155
3156   /* Verify that we get the expected token back, with the correct
3157      location information.  */
3158   const cpp_token *tok = test.get_token ();
3159   ASSERT_EQ (tok->type, CPP_WSTRING);
3160   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
3161
3162   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
3163   cpp_string dst_string;
3164   const enum cpp_ttype type = CPP_WSTRING;
3165   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3166                                       &dst_string, type);
3167   ASSERT_TRUE (result);
3168   /* The cpp_reader defaults to big-endian with
3169      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3170      now be encoded as UTF-32BE.  */
3171   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3172   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3173   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3174   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3175   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3176   free (const_cast <unsigned char *> (dst_string.text));
3177
3178   /* We don't yet support generating substring location information
3179      for L"" strings.  */
3180   ASSERT_HAS_NO_SUBSTRING_RANGES
3181     (test, tok->src_loc, type,
3182      "execution character set != source character set");
3183 }
3184
3185 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
3186
3187 static uint16_t
3188 uint16_from_big_endian (const uint16_t *ptr_be_value)
3189 {
3190   const unsigned char *buf = (const unsigned char *)ptr_be_value;
3191   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3192 }
3193
3194 /* Lex a u"" string literal and verify that attempts to read substring
3195    location data from it fail gracefully.  */
3196
3197 static void
3198 test_lexer_string_locations_string16 (const line_table_case &case_)
3199 {
3200   /* Digits 0-9.
3201      ....................000000000.11111111112.22222222233333
3202      ....................123456789.01234567890.12345678901234  */
3203   const char *content = "       u\"0123456789\" /* non-str */\n";
3204   lexer_test test (case_, content, NULL);
3205
3206   /* Verify that we get the expected token back, with the correct
3207      location information.  */
3208   const cpp_token *tok = test.get_token ();
3209   ASSERT_EQ (tok->type, CPP_STRING16);
3210   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3211
3212   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
3213   cpp_string dst_string;
3214   const enum cpp_ttype type = CPP_STRING16;
3215   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3216                                       &dst_string, type);
3217   ASSERT_TRUE (result);
3218
3219   /* The cpp_reader defaults to big-endian, so dst_string should
3220      now be encoded as UTF-16BE.  */
3221   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3222   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3223   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3224   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3225   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3226   free (const_cast <unsigned char *> (dst_string.text));
3227
3228   /* We don't yet support generating substring location information
3229      for L"" strings.  */
3230   ASSERT_HAS_NO_SUBSTRING_RANGES
3231     (test, tok->src_loc, type,
3232      "execution character set != source character set");
3233 }
3234
3235 /* Lex a U"" string literal and verify that attempts to read substring
3236    location data from it fail gracefully.  */
3237
3238 static void
3239 test_lexer_string_locations_string32 (const line_table_case &case_)
3240 {
3241   /* Digits 0-9.
3242      ....................000000000.11111111112.22222222233333
3243      ....................123456789.01234567890.12345678901234  */
3244   const char *content = "       U\"0123456789\" /* non-str */\n";
3245   lexer_test test (case_, content, NULL);
3246
3247   /* Verify that we get the expected token back, with the correct
3248      location information.  */
3249   const cpp_token *tok = test.get_token ();
3250   ASSERT_EQ (tok->type, CPP_STRING32);
3251   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3252
3253   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
3254   cpp_string dst_string;
3255   const enum cpp_ttype type = CPP_STRING32;
3256   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3257                                       &dst_string, type);
3258   ASSERT_TRUE (result);
3259
3260   /* The cpp_reader defaults to big-endian, so dst_string should
3261      now be encoded as UTF-32BE.  */
3262   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3263   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3264   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3265   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3266   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3267   free (const_cast <unsigned char *> (dst_string.text));
3268
3269   /* We don't yet support generating substring location information
3270      for L"" strings.  */
3271   ASSERT_HAS_NO_SUBSTRING_RANGES
3272     (test, tok->src_loc, type,
3273      "execution character set != source character set");
3274 }
3275
3276 /* Lex a u8-string literal.
3277    Verify the substring location data after running cpp_interpret_string
3278    on it.  */
3279
3280 static void
3281 test_lexer_string_locations_u8 (const line_table_case &case_)
3282 {
3283   /* Digits 0-9.
3284      ....................000000000.11111111112.22222222233333
3285      ....................123456789.01234567890.12345678901234  */
3286   const char *content = "      u8\"0123456789\" /* non-str */\n";
3287   lexer_test test (case_, content, NULL);
3288
3289   /* Verify that we get the expected token back, with the correct
3290      location information.  */
3291   const cpp_token *tok = test.get_token ();
3292   ASSERT_EQ (tok->type, CPP_UTF8STRING);
3293   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3294
3295   /* Verify that cpp_interpret_string works.  */
3296   cpp_string dst_string;
3297   const enum cpp_ttype type = CPP_STRING;
3298   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3299                                       &dst_string, type);
3300   ASSERT_TRUE (result);
3301   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3302   free (const_cast <unsigned char *> (dst_string.text));
3303
3304   /* Verify ranges of individual characters.  This no longer includes the
3305      opening quote, but does include the closing quote.  */
3306   for (int i = 0; i <= 10; i++)
3307     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3308 }
3309
3310 /* Lex a string literal containing UTF-8 source characters.
3311    Verify the substring location data after running cpp_interpret_string
3312    on it.  */
3313
3314 static void
3315 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3316 {
3317  /* This string literal is written out to the source file as UTF-8,
3318     and is of the form "before mojibake after", where "mojibake"
3319     is written as the following four unicode code points:
3320        U+6587 CJK UNIFIED IDEOGRAPH-6587
3321        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3322        U+5316 CJK UNIFIED IDEOGRAPH-5316
3323        U+3051 HIRAGANA LETTER KE.
3324      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3325      "before" and "after" are 1 byte per unicode character.
3326
3327      The numbering shown are "columns", which are *byte* numbers within
3328      the line, rather than unicode character numbers.
3329
3330      .................... 000000000.1111111.
3331      .................... 123456789.0123456.  */
3332   const char *content = ("        \"before "
3333                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3334                               UTF-8: 0xE6 0x96 0x87
3335                               C octal escaped UTF-8: \346\226\207
3336                             "column" numbers: 17-19.  */
3337                          "\346\226\207"
3338
3339                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3340                               UTF-8: 0xE5 0xAD 0x97
3341                               C octal escaped UTF-8: \345\255\227
3342                             "column" numbers: 20-22.  */
3343                          "\345\255\227"
3344
3345                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3346                               UTF-8: 0xE5 0x8C 0x96
3347                               C octal escaped UTF-8: \345\214\226
3348                             "column" numbers: 23-25.  */
3349                          "\345\214\226"
3350
3351                          /* U+3051 HIRAGANA LETTER KE
3352                               UTF-8: 0xE3 0x81 0x91
3353                               C octal escaped UTF-8: \343\201\221
3354                             "column" numbers: 26-28.  */
3355                          "\343\201\221"
3356
3357                          /* column numbers 29 onwards
3358                           2333333.33334444444444
3359                           9012345.67890123456789. */
3360                          " after\" /* non-str */\n");
3361   lexer_test test (case_, content, NULL);
3362
3363   /* Verify that we get the expected token back, with the correct
3364      location information.  */
3365   const cpp_token *tok = test.get_token ();
3366   ASSERT_EQ (tok->type, CPP_STRING);
3367   ASSERT_TOKEN_AS_TEXT_EQ
3368     (test.m_parser, tok,
3369      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3370
3371   /* Verify that cpp_interpret_string works.  */
3372   cpp_string dst_string;
3373   const enum cpp_ttype type = CPP_STRING;
3374   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3375                                       &dst_string, type);
3376   ASSERT_TRUE (result);
3377   ASSERT_STREQ
3378     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3379      (const char *)dst_string.text);
3380   free (const_cast <unsigned char *> (dst_string.text));
3381
3382   /* Verify ranges of individual characters.  This no longer includes the
3383      opening quote, but does include the closing quote.
3384      Assuming that both source and execution encodings are UTF-8, we have
3385      a run of 25 octets in each, plus the NUL terminator.  */
3386   for (int i = 0; i < 25; i++)
3387     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3388   /* NUL-terminator should use the closing quote at column 35.  */
3389   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3390
3391   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3392 }
3393
3394 /* Test of string literal concatenation.  */
3395
3396 static void
3397 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3398 {
3399   /* Digits 0-9.
3400      .....................000000000.111111.11112222222222
3401      .....................123456789.012345.67890123456789.  */
3402   const char *content = ("        \"01234\" /* non-str */\n"
3403                          "        \"56789\" /* non-str */\n");
3404   lexer_test test (case_, content, NULL);
3405
3406   location_t input_locs[2];
3407
3408   /* Verify that we get the expected tokens back.  */
3409   auto_vec <cpp_string> input_strings;
3410   const cpp_token *tok_a = test.get_token ();
3411   ASSERT_EQ (tok_a->type, CPP_STRING);
3412   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3413   input_strings.safe_push (tok_a->val.str);
3414   input_locs[0] = tok_a->src_loc;
3415
3416   const cpp_token *tok_b = test.get_token ();
3417   ASSERT_EQ (tok_b->type, CPP_STRING);
3418   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3419   input_strings.safe_push (tok_b->val.str);
3420   input_locs[1] = tok_b->src_loc;
3421
3422   /* Verify that cpp_interpret_string works.  */
3423   cpp_string dst_string;
3424   const enum cpp_ttype type = CPP_STRING;
3425   bool result = cpp_interpret_string (test.m_parser,
3426                                       input_strings.address (), 2,
3427                                       &dst_string, type);
3428   ASSERT_TRUE (result);
3429   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3430   free (const_cast <unsigned char *> (dst_string.text));
3431
3432   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3433   test.m_concats.record_string_concatenation (2, input_locs);
3434
3435   location_t initial_loc = input_locs[0];
3436
3437   /* "01234" on line 1.  */
3438   for (int i = 0; i <= 4; i++)
3439     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3440   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3441   for (int i = 5; i <= 10; i++)
3442     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3443
3444   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3445 }
3446
3447 /* Another test of string literal concatenation.  */
3448
3449 static void
3450 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3451 {
3452   /* Digits 0-9.
3453      .....................000000000.111.11111112222222
3454      .....................123456789.012.34567890123456.  */
3455   const char *content = ("        \"01\" /* non-str */\n"
3456                          "        \"23\" /* non-str */\n"
3457                          "        \"45\" /* non-str */\n"
3458                          "        \"67\" /* non-str */\n"
3459                          "        \"89\" /* non-str */\n");
3460   lexer_test test (case_, content, NULL);
3461
3462   auto_vec <cpp_string> input_strings;
3463   location_t input_locs[5];
3464
3465   /* Verify that we get the expected tokens back.  */
3466   for (int i = 0; i < 5; i++)
3467     {
3468       const cpp_token *tok = test.get_token ();
3469       ASSERT_EQ (tok->type, CPP_STRING);
3470       input_strings.safe_push (tok->val.str);
3471       input_locs[i] = tok->src_loc;
3472     }
3473
3474   /* Verify that cpp_interpret_string works.  */
3475   cpp_string dst_string;
3476   const enum cpp_ttype type = CPP_STRING;
3477   bool result = cpp_interpret_string (test.m_parser,
3478                                       input_strings.address (), 5,
3479                                       &dst_string, type);
3480   ASSERT_TRUE (result);
3481   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3482   free (const_cast <unsigned char *> (dst_string.text));
3483
3484   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3485   test.m_concats.record_string_concatenation (5, input_locs);
3486
3487   location_t initial_loc = input_locs[0];
3488
3489   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3490      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3491      and expect get_source_range_for_substring to fail.
3492      However, for a string concatenation test, we can have a case
3493      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3494      but subsequent strings can be after it.
3495      Attempting to detect this within assert_char_at_range
3496      would overcomplicate the logic for the common test cases, so
3497      we detect it here.  */
3498   if (should_have_column_data_p (input_locs[0])
3499       && !should_have_column_data_p (input_locs[4]))
3500     {
3501       /* Verify that get_source_range_for_substring gracefully rejects
3502          this case.  */
3503       source_range actual_range;
3504       const char *err
3505         = get_source_range_for_char (test.m_parser, test.m_file_cache,
3506                                      &test.m_concats,
3507                                      initial_loc, type, 0, &actual_range);
3508       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3509       return;
3510     }
3511
3512   for (int i = 0; i < 5; i++)
3513     for (int j = 0; j < 2; j++)
3514       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3515                             i + 1, 10 + j, 10 + j);
3516
3517   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3518   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3519
3520   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3521 }
3522
3523 /* Another test of string literal concatenation, this time combined with
3524    various kinds of escaped characters.  */
3525
3526 static void
3527 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3528 {
3529   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3530      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3531   const char *content
3532     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3533        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3534     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3535   lexer_test test (case_, content, NULL);
3536
3537   auto_vec <cpp_string> input_strings;
3538   location_t input_locs[4];
3539
3540   /* Verify that we get the expected tokens back.  */
3541   for (int i = 0; i < 4; i++)
3542     {
3543       const cpp_token *tok = test.get_token ();
3544       ASSERT_EQ (tok->type, CPP_STRING);
3545       input_strings.safe_push (tok->val.str);
3546       input_locs[i] = tok->src_loc;
3547     }
3548
3549   /* Verify that cpp_interpret_string works.  */
3550   cpp_string dst_string;
3551   const enum cpp_ttype type = CPP_STRING;
3552   bool result = cpp_interpret_string (test.m_parser,
3553                                       input_strings.address (), 4,
3554                                       &dst_string, type);
3555   ASSERT_TRUE (result);
3556   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3557   free (const_cast <unsigned char *> (dst_string.text));
3558
3559   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3560   test.m_concats.record_string_concatenation (4, input_locs);
3561
3562   location_t initial_loc = input_locs[0];
3563
3564   for (int i = 0; i <= 4; i++)
3565     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3566   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3567   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3568   for (int i = 7; i <= 9; i++)
3569     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3570
3571   /* NUL-terminator should use the location of the final closing quote.  */
3572   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3573
3574   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3575 }
3576
3577 /* Test of string literal in a macro.  */
3578
3579 static void
3580 test_lexer_string_locations_macro (const line_table_case &case_)
3581 {
3582   /* Digits 0-9.
3583      .....................0000000001111111111.22222222223.
3584      .....................1234567890123456789.01234567890.  */
3585   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3586                          "  MACRO");
3587   lexer_test test (case_, content, NULL);
3588
3589   /* Verify that we get the expected tokens back.  */
3590   const cpp_token *tok = test.get_token ();
3591   ASSERT_EQ (tok->type, CPP_PADDING);
3592
3593   tok = test.get_token ();
3594   ASSERT_EQ (tok->type, CPP_STRING);
3595   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3596
3597   /* Verify ranges of individual characters.  We ought to
3598      see columns within the macro definition.  */
3599   for (int i = 0; i <= 10; i++)
3600     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3601                           i, 1, 20 + i, 20 + i);
3602
3603   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3604
3605   tok = test.get_token ();
3606   ASSERT_EQ (tok->type, CPP_PADDING);
3607 }
3608
3609 /* Test of stringification of a macro argument.  */
3610
3611 static void
3612 test_lexer_string_locations_stringified_macro_argument
3613   (const line_table_case &case_)
3614 {
3615   /* .....................000000000111111111122222222223.
3616      .....................123456789012345678901234567890.  */
3617   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3618                          "MACRO(foo)\n");
3619   lexer_test test (case_, content, NULL);
3620
3621   /* Verify that we get the expected token back.  */
3622   const cpp_token *tok = test.get_token ();
3623   ASSERT_EQ (tok->type, CPP_PADDING);
3624
3625   tok = test.get_token ();
3626   ASSERT_EQ (tok->type, CPP_STRING);
3627   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3628
3629   /* We don't support getting the location of a stringified macro
3630      argument.  Verify that it fails gracefully.  */
3631   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3632                                   "cpp_interpret_string_1 failed");
3633
3634   tok = test.get_token ();
3635   ASSERT_EQ (tok->type, CPP_PADDING);
3636
3637   tok = test.get_token ();
3638   ASSERT_EQ (tok->type, CPP_PADDING);
3639 }
3640
3641 /* Ensure that we are fail gracefully if something attempts to pass
3642    in a location that isn't a string literal token.  Seen on this code:
3643
3644      const char a[] = " %d ";
3645      __builtin_printf (a, 0.5);
3646                        ^
3647
3648    when c-format.cc erroneously used the indicated one-character
3649    location as the format string location, leading to a read past the
3650    end of a string buffer in cpp_interpret_string_1.  */
3651
3652 static void
3653 test_lexer_string_locations_non_string (const line_table_case &case_)
3654 {
3655   /* .....................000000000111111111122222222223.
3656      .....................123456789012345678901234567890.  */
3657   const char *content = ("         a\n");
3658   lexer_test test (case_, content, NULL);
3659
3660   /* Verify that we get the expected token back.  */
3661   const cpp_token *tok = test.get_token ();
3662   ASSERT_EQ (tok->type, CPP_NAME);
3663   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3664
3665   /* At this point, libcpp is attempting to interpret the name as a
3666      string literal, despite it not starting with a quote.  We don't detect
3667      that, but we should at least fail gracefully.  */
3668   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3669                                   "cpp_interpret_string_1 failed");
3670 }
3671
3672 /* Ensure that we can read substring information for a token which
3673    starts in one linemap and ends in another .  Adapted from
3674    gcc.dg/cpp/pr69985.c.  */
3675
3676 static void
3677 test_lexer_string_locations_long_line (const line_table_case &case_)
3678 {
3679   /* .....................000000.000111111111
3680      .....................123456.789012346789.  */
3681   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3682                          "     \"0123456789012345678901234567890123456789"
3683                          "0123456789012345678901234567890123456789"
3684                          "0123456789012345678901234567890123456789"
3685                          "0123456789\"\n");
3686
3687   lexer_test test (case_, content, NULL);
3688
3689   /* Verify that we get the expected token back.  */
3690   const cpp_token *tok = test.get_token ();
3691   ASSERT_EQ (tok->type, CPP_STRING);
3692
3693   if (!should_have_column_data_p (line_table->highest_location))
3694     return;
3695
3696   /* Verify ranges of individual characters.  */
3697   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3698   for (int i = 0; i < 131; i++)
3699     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3700                           i, 2, 7 + i, 7 + i);
3701 }
3702
3703 /* Test of locations within a raw string that doesn't contain a newline.  */
3704
3705 static void
3706 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3707 {
3708   /* .....................00.0000000111111111122.
3709      .....................12.3456789012345678901.  */
3710   const char *content = ("R\"foo(0123456789)foo\"\n");
3711   lexer_test test (case_, content, NULL);
3712
3713   /* Verify that we get the expected token back.  */
3714   const cpp_token *tok = test.get_token ();
3715   ASSERT_EQ (tok->type, CPP_STRING);
3716
3717   /* Verify that cpp_interpret_string works.  */
3718   cpp_string dst_string;
3719   const enum cpp_ttype type = CPP_STRING;
3720   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3721                                       &dst_string, type);
3722   ASSERT_TRUE (result);
3723   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3724   free (const_cast <unsigned char *> (dst_string.text));
3725
3726   if (!should_have_column_data_p (line_table->highest_location))
3727     return;
3728
3729   /* 0-9, plus the nil terminator.  */
3730   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3731   for (int i = 0; i < 11; i++)
3732     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3733                           i, 1, 7 + i, 7 + i);
3734 }
3735
3736 /* Test of locations within a raw string that contains a newline.  */
3737
3738 static void
3739 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3740 {
3741   /* .....................00.0000.
3742      .....................12.3456.  */
3743   const char *content = ("R\"foo(\n"
3744   /* .....................00000.
3745      .....................12345.  */
3746                          "hello\n"
3747                          "world\n"
3748   /* .....................00000.
3749      .....................12345.  */
3750                          ")foo\"\n");
3751   lexer_test test (case_, content, NULL);
3752
3753   /* Verify that we get the expected token back.  */
3754   const cpp_token *tok = test.get_token ();
3755   ASSERT_EQ (tok->type, CPP_STRING);
3756
3757   /* Verify that cpp_interpret_string works.  */
3758   cpp_string dst_string;
3759   const enum cpp_ttype type = CPP_STRING;
3760   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3761                                       &dst_string, type);
3762   ASSERT_TRUE (result);
3763   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3764   free (const_cast <unsigned char *> (dst_string.text));
3765
3766   if (!should_have_column_data_p (line_table->highest_location))
3767     return;
3768
3769   /* Currently we don't support locations within raw strings that
3770      contain newlines.  */
3771   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3772                                   "range endpoints are on different lines");
3773 }
3774
3775 /* Test of parsing an unterminated raw string.  */
3776
3777 static void
3778 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3779 {
3780   const char *content = "R\"ouch()ouCh\" /* etc */";
3781
3782   lexer_diagnostic_sink diagnostics;
3783   lexer_test test (case_, content, &diagnostics);
3784   test.m_implicitly_expect_EOF = false;
3785
3786   /* Attempt to parse the raw string.  */
3787   const cpp_token *tok = test.get_token ();
3788   ASSERT_EQ (tok->type, CPP_EOF);
3789
3790   ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3791   /* We expect the message "unterminated raw string"
3792      in the "cpplib" translation domain.
3793      It's not clear that dgettext is available on all supported hosts,
3794      so this assertion is commented-out for now.
3795        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3796                      diagnostics.m_diagnostics[0]);
3797   */
3798 }
3799
3800 /* Test of lexing char constants.  */
3801
3802 static void
3803 test_lexer_char_constants (const line_table_case &case_)
3804 {
3805   /* Various char constants.
3806      .....................0000000001111111111.22222222223.
3807      .....................1234567890123456789.01234567890.  */
3808   const char *content = ("         'a'\n"
3809                          "        u'a'\n"
3810                          "        U'a'\n"
3811                          "        L'a'\n"
3812                          "         'abc'\n");
3813   lexer_test test (case_, content, NULL);
3814
3815   /* Verify that we get the expected tokens back.  */
3816   /* 'a'.  */
3817   const cpp_token *tok = test.get_token ();
3818   ASSERT_EQ (tok->type, CPP_CHAR);
3819   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3820
3821   unsigned int chars_seen;
3822   int unsignedp;
3823   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3824                                           &chars_seen, &unsignedp);
3825   ASSERT_EQ (cc, 'a');
3826   ASSERT_EQ (chars_seen, 1);
3827
3828   /* u'a'.  */
3829   tok = test.get_token ();
3830   ASSERT_EQ (tok->type, CPP_CHAR16);
3831   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3832
3833   /* U'a'.  */
3834   tok = test.get_token ();
3835   ASSERT_EQ (tok->type, CPP_CHAR32);
3836   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3837
3838   /* L'a'.  */
3839   tok = test.get_token ();
3840   ASSERT_EQ (tok->type, CPP_WCHAR);
3841   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3842
3843   /* 'abc' (c-char-sequence).  */
3844   tok = test.get_token ();
3845   ASSERT_EQ (tok->type, CPP_CHAR);
3846   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3847 }
3848 /* A table of interesting location_t values, giving one axis of our test
3849    matrix.  */
3850
3851 static const location_t boundary_locations[] = {
3852   /* Zero means "don't override the default values for a new line_table".  */
3853   0,
3854
3855   /* An arbitrary non-zero value that isn't close to one of
3856      the boundary values below.  */
3857   0x10000,
3858
3859   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3860   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3861   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3862   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3863   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3864   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3865
3866   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3867   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3868   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3869   LINE_MAP_MAX_LOCATION_WITH_COLS,
3870   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3871   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3872 };
3873
3874 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3875
3876 void
3877 for_each_line_table_case (void (*testcase) (const line_table_case &))
3878 {
3879   /* As noted above in the description of struct line_table_case,
3880      we want to explore a test matrix of interesting line_table
3881      situations, running various selftests for each case within the
3882      matrix.  */
3883
3884   /* Run all tests with:
3885      (a) line_table->default_range_bits == 0, and
3886      (b) line_table->default_range_bits == 5.  */
3887   int num_cases_tested = 0;
3888   for (int default_range_bits = 0; default_range_bits <= 5;
3889        default_range_bits += 5)
3890     {
3891       /* ...and use each of the "interesting" location values as
3892          the starting location within line_table.  */
3893       const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
3894       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3895         {
3896           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3897
3898           testcase (c);
3899
3900           num_cases_tested++;
3901         }
3902     }
3903
3904   /* Verify that we fully covered the test matrix.  */
3905   ASSERT_EQ (num_cases_tested, 2 * 12);
3906 }
3907
3908 /* Verify that when presented with a consecutive pair of locations with
3909    a very large line offset, we don't attempt to consolidate them into
3910    a single ordinary linemap where the line offsets within the line map
3911    would lead to overflow (PR lto/88147).  */
3912
3913 static void
3914 test_line_offset_overflow ()
3915 {
3916   line_table_test ltt (line_table_case (5, 0));
3917
3918   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3919   linemap_line_start (line_table, 1, 100);
3920   location_t loc_a = linemap_line_start (line_table, 2578, 255);
3921   assert_loceq ("foo.c", 2578, 0, loc_a);
3922
3923   const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3924   ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3925   ASSERT_EQ (ordmap_a->m_range_bits, 5);
3926
3927   location_t loc_b = linemap_line_start (line_table, 404198, 512);
3928   assert_loceq ("foo.c", 404198, 0, loc_b);
3929
3930   /* We should have started a new linemap, rather than attempting to store
3931      a very large line offset.  */
3932   const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3933   ASSERT_NE (ordmap_a, ordmap_b);
3934 }
3935
3936 void test_cpp_utf8 ()
3937 {
3938   const int def_tabstop = 8;
3939   cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3940
3941   /* Verify that wcwidth of invalid UTF-8 or control bytes is 1.  */
3942   {
3943     int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
3944     ASSERT_EQ (8, w_bad);
3945     int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
3946     ASSERT_EQ (5, w_ctrl);
3947   }
3948
3949   /* Verify that wcwidth of valid UTF-8 is as expected.  */
3950   {
3951     const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
3952     ASSERT_EQ (1, w_pi);
3953     const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
3954     ASSERT_EQ (2, w_emoji);
3955     const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3956                                                         policy);
3957     ASSERT_EQ (1, w_umlaut_precomposed);
3958     const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3959                                                       policy);
3960     ASSERT_EQ (1, w_umlaut_combining);
3961     const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
3962     ASSERT_EQ (2, w_han);
3963     const int w_ascii = cpp_display_width ("GCC", 3, policy);
3964     ASSERT_EQ (3, w_ascii);
3965     const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3966                                            "\x9f! \xe4\xb8\xba y\xcc\x88",
3967                                            24, policy);
3968     ASSERT_EQ (18, w_mixed);
3969   }
3970
3971   /* Verify that display width properly expands tabs.  */
3972   {
3973     const char *tstr = "\tabc\td";
3974     ASSERT_EQ (6, cpp_display_width (tstr, 6,
3975                                      cpp_char_column_policy (1, cpp_wcwidth)));
3976     ASSERT_EQ (10, cpp_display_width (tstr, 6,
3977                                       cpp_char_column_policy (3, cpp_wcwidth)));
3978     ASSERT_EQ (17, cpp_display_width (tstr, 6,
3979                                       cpp_char_column_policy (8, cpp_wcwidth)));
3980     ASSERT_EQ (1,
3981                cpp_display_column_to_byte_column
3982                  (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
3983   }
3984
3985   /* Verify that cpp_byte_column_to_display_column can go past the end,
3986      and similar edge cases.  */
3987   {
3988     const char *str
3989       /* Display columns.
3990          111111112345  */
3991       = "\xcf\x80 abc";
3992       /* 111122223456
3993          Byte columns.  */
3994
3995     ASSERT_EQ (5, cpp_display_width (str, 6, policy));
3996     ASSERT_EQ (105,
3997                cpp_byte_column_to_display_column (str, 6, 106, policy));
3998     ASSERT_EQ (10000,
3999                cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
4000     ASSERT_EQ (0,
4001                cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
4002   }
4003
4004   /* Verify that cpp_display_column_to_byte_column can go past the end,
4005      and similar edge cases, and check invertibility.  */
4006   {
4007     const char *str
4008       /* Display columns.
4009          000000000000000000000000000000000000011
4010          111111112222222234444444455555555678901  */
4011       = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
4012       /* 000000000000000000000000000000000111111
4013          111122223333444456666777788889999012345
4014          Byte columns.  */
4015     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
4016     ASSERT_EQ (15,
4017                cpp_display_column_to_byte_column (str, 15, 11, policy));
4018     ASSERT_EQ (115,
4019                cpp_display_column_to_byte_column (str, 15, 111, policy));
4020     ASSERT_EQ (10000,
4021                cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
4022     ASSERT_EQ (0,
4023                cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
4024
4025     /* Verify that we do not interrupt a UTF-8 sequence.  */
4026     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
4027
4028     for (int byte_col = 1; byte_col <= 15; ++byte_col)
4029       {
4030         const int disp_col
4031           = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
4032         const int byte_col2
4033           = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
4034
4035         /* If we ask for the display column in the middle of a UTF-8
4036            sequence, it will return the length of the partial sequence,
4037            matching the behavior of GCC before display column support.
4038            Otherwise check the round trip was successful.  */
4039         if (byte_col < 4)
4040           ASSERT_EQ (byte_col, disp_col);
4041         else if (byte_col >= 6 && byte_col < 9)
4042           ASSERT_EQ (3 + (byte_col - 5), disp_col);
4043         else
4044           ASSERT_EQ (byte_col2, byte_col);
4045       }
4046   }
4047 }
4048
4049 static bool
4050 check_cpp_valid_utf8_p (const char *str)
4051 {
4052   return cpp_valid_utf8_p (str, strlen (str));
4053 }
4054
4055 /* Check that cpp_valid_utf8_p works as expected.  */
4056
4057 static void
4058 test_cpp_valid_utf8_p ()
4059 {
4060   ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world"));
4061
4062   /* 2-byte char (pi).  */
4063   ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80"));
4064
4065   /* 3-byte chars (the Japanese word "mojibake").  */
4066   ASSERT_TRUE (check_cpp_valid_utf8_p
4067                (
4068                 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
4069                    UTF-8: 0xE6 0x96 0x87
4070                    C octal escaped UTF-8: \346\226\207.  */
4071                 "\346\226\207"
4072                 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
4073                    UTF-8: 0xE5 0xAD 0x97
4074                    C octal escaped UTF-8: \345\255\227.  */
4075                 "\345\255\227"
4076                 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
4077                    UTF-8: 0xE5 0x8C 0x96
4078                    C octal escaped UTF-8: \345\214\226.  */
4079                 "\345\214\226"
4080                 /* U+3051 HIRAGANA LETTER KE
4081                    UTF-8: 0xE3 0x81 0x91
4082                    C octal escaped UTF-8: \343\201\221.  */
4083                 "\343\201\221"));
4084
4085   /* 4-byte char: an emoji.  */
4086   ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82"));
4087
4088   /* Control codes, including the NUL byte.  */
4089   ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", 5));
4090
4091   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!"));
4092
4093   /* Unexpected continuation bytes.  */
4094   for (unsigned char continuation_byte = 0x80;
4095        continuation_byte <= 0xbf;
4096        continuation_byte++)
4097     ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, 1));
4098
4099   /* "Lonely start characters" for 2-byte sequences.  */
4100   {
4101     unsigned char buf[2];
4102     buf[1] = ' ';
4103     for (buf[0] = 0xc0;
4104          buf[0] <= 0xdf;
4105          buf[0]++)
4106       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4107   }
4108
4109   /* "Lonely start characters" for 3-byte sequences.  */
4110   {
4111     unsigned char buf[2];
4112     buf[1] = ' ';
4113     for (buf[0] = 0xe0;
4114          buf[0] <= 0xef;
4115          buf[0]++)
4116       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4117   }
4118
4119   /* "Lonely start characters" for 4-byte sequences.  */
4120   {
4121     unsigned char buf[2];
4122     buf[1] = ' ';
4123     for (buf[0] = 0xf0;
4124          buf[0] <= 0xf4;
4125          buf[0]++)
4126       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4127   }
4128
4129   /* Invalid start characters (formerly valid for 5-byte and 6-byte
4130      sequences).  */
4131   {
4132     unsigned char buf[2];
4133     buf[1] = ' ';
4134     for (buf[0] = 0xf5;
4135          buf[0] <= 0xfd;
4136          buf[0]++)
4137       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4138   }
4139
4140   /* Impossible bytes.  */
4141   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0"));
4142   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1"));
4143   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe"));
4144   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff"));
4145 }
4146
4147 /* Run all of the selftests within this file.  */
4148
4149 void
4150 input_cc_tests ()
4151 {
4152   test_linenum_comparisons ();
4153   test_should_have_column_data_p ();
4154   test_unknown_location ();
4155   test_builtins ();
4156   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
4157
4158   for_each_line_table_case (test_accessing_ordinary_linemaps);
4159   for_each_line_table_case (test_lexer);
4160   for_each_line_table_case (test_lexer_string_locations_simple);
4161   for_each_line_table_case (test_lexer_string_locations_ebcdic);
4162   for_each_line_table_case (test_lexer_string_locations_hex);
4163   for_each_line_table_case (test_lexer_string_locations_oct);
4164   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
4165   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
4166   for_each_line_table_case (test_lexer_string_locations_ucn4);
4167   for_each_line_table_case (test_lexer_string_locations_ucn8);
4168   for_each_line_table_case (test_lexer_string_locations_wide_string);
4169   for_each_line_table_case (test_lexer_string_locations_string16);
4170   for_each_line_table_case (test_lexer_string_locations_string32);
4171   for_each_line_table_case (test_lexer_string_locations_u8);
4172   for_each_line_table_case (test_lexer_string_locations_utf8_source);
4173   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
4174   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
4175   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
4176   for_each_line_table_case (test_lexer_string_locations_macro);
4177   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
4178   for_each_line_table_case (test_lexer_string_locations_non_string);
4179   for_each_line_table_case (test_lexer_string_locations_long_line);
4180   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
4181   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
4182   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
4183   for_each_line_table_case (test_lexer_char_constants);
4184
4185   test_reading_source_line ();
4186
4187   test_line_offset_overflow ();
4188
4189   test_cpp_utf8 ();
4190   test_cpp_valid_utf8_p ();
4191 }
4192
4193 } // namespace selftest
4194
4195 #endif /* CHECKING_P */