gcc/input.cc

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic.h"
  25 #include "selftest.h"
  26 #include "cpplib.h"
  27
  28 #ifndef HAVE_ICONV
  29 #define HAVE_ICONV 0
  30 #endif
  31
  32 const char *
  33 special_fname_builtin ()
  34 {
  35   return _("<built-in>");
  36 }
  37
  38 /* Input charset configuration.  */
  39 static const char *default_charset_callback (const char *)
  40 {
  41   return nullptr;
  42 }
  43
  44 void
  45 file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
  46                                       bool should_skip_bom)
  47 {
  48   in_context.ccb = (ccb ? ccb : default_charset_callback);
  49   in_context.should_skip_bom = should_skip_bom;
  50 }
  51
  52 /* This is a cache used by get_next_line to store the content of a
  53    file to be searched for file lines.  */
  54 class file_cache_slot
  55 {
  56 public:
  57   file_cache_slot ();
  58   ~file_cache_slot ();
  59
  60   bool read_line_num (size_t line_num,
  61                       char ** line, ssize_t *line_len);
  62
  63   /* Accessors.  */
  64   const char *get_file_path () const { return m_file_path; }
  65   unsigned get_use_count () const { return m_use_count; }
  66   bool missing_trailing_newline_p () const
  67   {
  68     return m_missing_trailing_newline;
  69   }
  70   char_span get_full_file_content ();
  71
  72   void inc_use_count () { m_use_count++; }
  73
  74   bool create (const file_cache::input_context &in_context,
  75                const char *file_path, FILE *fp, unsigned highest_use_count);
  76   void evict ();
  77
  78  private:
  79   /* These are information used to store a line boundary.  */
  80   class line_info
  81   {
  82   public:
  83     /* The line number.  It starts from 1.  */
  84     size_t line_num;
  85
  86     /* The position (byte count) of the beginning of the line,
  87        relative to the file data pointer.  This starts at zero.  */
  88     size_t start_pos;
  89
  90     /* The position (byte count) of the last byte of the line.  This
  91        normally points to the '\n' character, or to one byte after the
  92        last byte of the file, if the file doesn't contain a '\n'
  93        character.  */
  94     size_t end_pos;
  95
  96     line_info (size_t l, size_t s, size_t e)
  97       : line_num (l), start_pos (s), end_pos (e)
  98     {}
  99
 100     line_info ()
 101       :line_num (0), start_pos (0), end_pos (0)
 102     {}
 103   };
 104
 105   bool needs_read_p () const;
 106   bool needs_grow_p () const;
 107   void maybe_grow ();
 108   bool read_data ();
 109   bool maybe_read_data ();
 110   bool get_next_line (char **line, ssize_t *line_len);
 111   bool read_next_line (char ** line, ssize_t *line_len);
 112   bool goto_next_line ();
 113
 114   static const size_t buffer_size = 4 * 1024;
 115   static const size_t line_record_size = 100;
 116
 117   /* The number of time this file has been accessed.  This is used
 118      to designate which file cache to evict from the cache
 119      array.  */
 120   unsigned m_use_count;
 121
 122   /* The file_path is the key for identifying a particular file in
 123      the cache.
 124      For libcpp-using code, the underlying buffer for this field is
 125      owned by the corresponding _cpp_file within the cpp_reader.  */
 126   const char *m_file_path;
 127
 128   FILE *m_fp;
 129
 130   /* This points to the content of the file that we've read so
 131      far.  */
 132   char *m_data;
 133
 134   /* The allocated buffer to be freed may start a little earlier than DATA,
 135      e.g. if a UTF8 BOM was skipped at the beginning.  */
 136   int m_alloc_offset;
 137
 138   /*  The size of the DATA array above.*/
 139   size_t m_size;
 140
 141   /* The number of bytes read from the underlying file so far.  This
 142      must be less (or equal) than SIZE above.  */
 143   size_t m_nb_read;
 144
 145   /* The index of the beginning of the current line.  */
 146   size_t m_line_start_idx;
 147
 148   /* The number of the previous line read.  This starts at 1.  Zero
 149      means we've read no line so far.  */
 150   size_t m_line_num;
 151
 152   /* This is the total number of lines of the current file.  At the
 153      moment, we try to get this information from the line map
 154      subsystem.  Note that this is just a hint.  When using the C++
 155      front-end, this hint is correct because the input file is then
 156      completely tokenized before parsing starts; so the line map knows
 157      the number of lines before compilation really starts.  For e.g,
 158      the C front-end, it can happen that we start emitting diagnostics
 159      before the line map has seen the end of the file.  */
 160   size_t m_total_lines;
 161
 162   /* Could this file be missing a trailing newline on its final line?
 163      Initially true (to cope with empty files), set to true/false
 164      as each line is read.  */
 165   bool m_missing_trailing_newline;
 166
 167   /* This is a record of the beginning and end of the lines we've seen
 168      while reading the file.  This is useful to avoid walking the data
 169      from the beginning when we are asked to read a line that is
 170      before LINE_START_IDX above.  Note that the maximum size of this
 171      record is line_record_size, so that the memory consumption
 172      doesn't explode.  We thus scale total_lines down to
 173      line_record_size.  */
 174   vec<line_info, va_heap> m_line_record;
 175
 176   void offset_buffer (int offset)
 177   {
 178     gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
 179                 : (size_t) offset <= m_size);
 180     gcc_assert (m_data);
 181     m_alloc_offset += offset;
 182     m_data += offset;
 183     m_size -= offset;
 184   }
 185
 186 };
 187
 188 /* Current position in real source file.  */
 189
 190 location_t input_location = UNKNOWN_LOCATION;
 191
 192 class line_maps *line_table;
 193
 194 /* A stashed copy of "line_table" for use by selftest::line_table_test.
 195    This needs to be a global so that it can be a GC root, and thus
 196    prevent the stashed copy from being garbage-collected if the GC runs
 197    during a line_table_test.  */
 198
 199 class line_maps *saved_line_table;
 200
 201 /* Expand the source location LOC into a human readable location.  If
 202    LOC resolves to a builtin location, the file name of the readable
 203    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 204    TRUE and LOC is virtual, then it is resolved to the expansion
 205    point of the involved macro.  Otherwise, it is resolved to the
 206    spelling location of the token.
 207
 208    When resolving to the spelling location of the token, if the
 209    resulting location is for a built-in location (that is, it has no
 210    associated line/column) in the context of a macro expansion, the
 211    returned location is the first one (while unwinding the macro
 212    location towards its expansion point) that is in real source
 213    code.
 214
 215    ASPECT controls which part of the location to use.  */
 216
 217 static expanded_location
 218 expand_location_1 (location_t loc,
 219                    bool expansion_point_p,
 220                    enum location_aspect aspect)
 221 {
 222   expanded_location xloc;
 223   const line_map_ordinary *map;
 224   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 225   tree block = NULL;
 226
 227   if (IS_ADHOC_LOC (loc))
 228     {
 229       block = LOCATION_BLOCK (loc);
 230       loc = LOCATION_LOCUS (loc);
 231     }
 232
 233   memset (&xloc, 0, sizeof (xloc));
 234
 235   if (loc >= RESERVED_LOCATION_COUNT)
 236     {
 237       if (!expansion_point_p)
 238         {
 239           /* We want to resolve LOC to its spelling location.
 240
 241              But if that spelling location is a reserved location that
 242              appears in the context of a macro expansion (like for a
 243              location for a built-in token), let's consider the first
 244              location (toward the expansion point) that is not reserved;
 245              that is, the first location that is in real source code.  */
 246           loc = linemap_unwind_to_first_non_reserved_loc (line_table,
 247                                                           loc, NULL);
 248           lrk = LRK_SPELLING_LOCATION;
 249         }
 250       loc = linemap_resolve_location (line_table, loc, lrk, &map);
 251
 252       /* loc is now either in an ordinary map, or is a reserved location.
 253          If it is a compound location, the caret is in a spelling location,
 254          but the start/finish might still be a virtual location.
 255          Depending of what the caller asked for, we may need to recurse
 256          one level in order to resolve any virtual locations in the
 257          end-points.  */
 258       switch (aspect)
 259         {
 260         default:
 261           gcc_unreachable ();
 262           /* Fall through.  */
 263         case LOCATION_ASPECT_CARET:
 264           break;
 265         case LOCATION_ASPECT_START:
 266           {
 267             location_t start = get_start (loc);
 268             if (start != loc)
 269               return expand_location_1 (start, expansion_point_p, aspect);
 270           }
 271           break;
 272         case LOCATION_ASPECT_FINISH:
 273           {
 274             location_t finish = get_finish (loc);
 275             if (finish != loc)
 276               return expand_location_1 (finish, expansion_point_p, aspect);
 277           }
 278           break;
 279         }
 280       xloc = linemap_expand_location (line_table, map, loc);
 281     }
 282
 283   xloc.data = block;
 284   if (loc <= BUILTINS_LOCATION)
 285     xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin ();
 286
 287   return xloc;
 288 }
 289
 290 /* Initialize the set of cache used for files accessed by caret
 291    diagnostic.  */
 292
 293 static void
 294 diagnostic_file_cache_init (void)
 295 {
 296   gcc_assert (global_dc);
 297   if (global_dc->m_file_cache == NULL)
 298     global_dc->m_file_cache = new file_cache ();
 299 }
 300
 301 /* Free the resources used by the set of cache used for files accessed
 302    by caret diagnostic.  */
 303
 304 void
 305 diagnostic_file_cache_fini (void)
 306 {
 307   if (global_dc->m_file_cache)
 308     {
 309       delete global_dc->m_file_cache;
 310       global_dc->m_file_cache = NULL;
 311     }
 312 }
 313
 314 /* Return the total lines number that have been read so far by the
 315    line map (in the preprocessor) so far.  For languages like C++ that
 316    entirely preprocess the input file before starting to parse, this
 317    equals the actual number of lines of the file.  */
 318
 319 static size_t
 320 total_lines_num (const char *file_path)
 321 {
 322   size_t r = 0;
 323   location_t l = 0;
 324   if (linemap_get_file_highest_location (line_table, file_path, &l))
 325     {
 326       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 327       expanded_location xloc = expand_location (l);
 328       r = xloc.line;
 329     }
 330   return r;
 331 }
 332
 333 /* Lookup the cache used for the content of a given file accessed by
 334    caret diagnostic.  Return the found cached file, or NULL if no
 335    cached file was found.  */
 336
 337 file_cache_slot *
 338 file_cache::lookup_file (const char *file_path)
 339 {
 340   gcc_assert (file_path);
 341
 342   /* This will contain the found cached file.  */
 343   file_cache_slot *r = NULL;
 344   for (unsigned i = 0; i < num_file_slots; ++i)
 345     {
 346       file_cache_slot *c = &m_file_slots[i];
 347       if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
 348         {
 349           c->inc_use_count ();
 350           r = c;
 351         }
 352     }
 353
 354   if (r)
 355     r->inc_use_count ();
 356
 357   return r;
 358 }
 359
 360 /* Purge any mention of FILENAME from the cache of files used for
 361    printing source code.  For use in selftests when working
 362    with tempfiles.  */
 363
 364 void
 365 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
 366 {
 367   gcc_assert (file_path);
 368
 369   if (!global_dc->m_file_cache)
 370     return;
 371
 372   global_dc->m_file_cache->forcibly_evict_file (file_path);
 373 }
 374
 375 void
 376 file_cache::forcibly_evict_file (const char *file_path)
 377 {
 378   gcc_assert (file_path);
 379
 380   file_cache_slot *r = lookup_file (file_path);
 381   if (!r)
 382     /* Not found.  */
 383     return;
 384
 385   r->evict ();
 386 }
 387
 388 void
 389 file_cache_slot::evict ()
 390 {
 391   m_file_path = NULL;
 392   if (m_fp)
 393     fclose (m_fp);
 394   m_fp = NULL;
 395   m_nb_read = 0;
 396   m_line_start_idx = 0;
 397   m_line_num = 0;
 398   m_line_record.truncate (0);
 399   m_use_count = 0;
 400   m_total_lines = 0;
 401   m_missing_trailing_newline = true;
 402 }
 403
 404 /* Return the file cache that has been less used, recently, or the
 405    first empty one.  If HIGHEST_USE_COUNT is non-null,
 406    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 407    in the cache table.  */
 408
 409 file_cache_slot*
 410 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
 411 {
 412   diagnostic_file_cache_init ();
 413
 414   file_cache_slot *to_evict = &m_file_slots[0];
 415   unsigned huc = to_evict->get_use_count ();
 416   for (unsigned i = 1; i < num_file_slots; ++i)
 417     {
 418       file_cache_slot *c = &m_file_slots[i];
 419       bool c_is_empty = (c->get_file_path () == NULL);
 420
 421       if (c->get_use_count () < to_evict->get_use_count ()
 422           || (to_evict->get_file_path () && c_is_empty))
 423         /* We evict C because it's either an entry with a lower use
 424            count or one that is empty.  */
 425         to_evict = c;
 426
 427       if (huc < c->get_use_count ())
 428         huc = c->get_use_count ();
 429
 430       if (c_is_empty)
 431         /* We've reached the end of the cache; subsequent elements are
 432            all empty.  */
 433         break;
 434     }
 435
 436   if (highest_use_count)
 437     *highest_use_count = huc;
 438
 439   return to_evict;
 440 }
 441
 442 /* Create the cache used for the content of a given file to be
 443    accessed by caret diagnostic.  This cache is added to an array of
 444    cache and can be retrieved by lookup_file_in_cache_tab.  This
 445    function returns the created cache.  Note that only the last
 446    num_file_slots files are cached.  */
 447
 448 file_cache_slot*
 449 file_cache::add_file (const char *file_path)
 450 {
 451
 452   FILE *fp = fopen (file_path, "r");
 453   if (fp == NULL)
 454     return NULL;
 455
 456   unsigned highest_use_count = 0;
 457   file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
 458   if (!r->create (in_context, file_path, fp, highest_use_count))
 459     return NULL;
 460   return r;
 461 }
 462
 463 /* Get a borrowed char_span to the full content of this file
 464    as decoded according to the input charset, encoded as UTF-8.  */
 465
 466 char_span
 467 file_cache_slot::get_full_file_content ()
 468 {
 469   char *line;
 470   ssize_t line_len;
 471   while (get_next_line (&line, &line_len))
 472     {
 473     }
 474   return char_span (m_data, m_nb_read);
 475 }
 476
 477 /* Populate this slot for use on FILE_PATH and FP, dropping any
 478    existing cached content within it.  */
 479
 480 bool
 481 file_cache_slot::create (const file_cache::input_context &in_context,
 482                          const char *file_path, FILE *fp,
 483                          unsigned highest_use_count)
 484 {
 485   m_file_path = file_path;
 486   if (m_fp)
 487     fclose (m_fp);
 488   m_fp = fp;
 489   if (m_alloc_offset)
 490     offset_buffer (-m_alloc_offset);
 491   m_nb_read = 0;
 492   m_line_start_idx = 0;
 493   m_line_num = 0;
 494   m_line_record.truncate (0);
 495   /* Ensure that this cache entry doesn't get evicted next time
 496      add_file_to_cache_tab is called.  */
 497   m_use_count = ++highest_use_count;
 498   m_total_lines = total_lines_num (file_path);
 499   m_missing_trailing_newline = true;
 500
 501
 502   /* Check the input configuration to determine if we need to do any
 503      transformations, such as charset conversion or BOM skipping.  */
 504   if (const char *input_charset = in_context.ccb (file_path))
 505     {
 506       /* Need a full-blown conversion of the input charset.  */
 507       fclose (m_fp);
 508       m_fp = NULL;
 509       const cpp_converted_source cs
 510         = cpp_get_converted_source (file_path, input_charset);
 511       if (!cs.data)
 512         return false;
 513       if (m_data)
 514         XDELETEVEC (m_data);
 515       m_data = cs.data;
 516       m_nb_read = m_size = cs.len;
 517       m_alloc_offset = cs.data - cs.to_free;
 518     }
 519   else if (in_context.should_skip_bom)
 520     {
 521       if (read_data ())
 522         {
 523           const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
 524           offset_buffer (offset);
 525           m_nb_read -= offset;
 526         }
 527     }
 528
 529   return true;
 530 }
 531
 532 /* file_cache's ctor.  */
 533
 534 file_cache::file_cache ()
 535 : m_file_slots (new file_cache_slot[num_file_slots])
 536 {
 537   initialize_input_context (nullptr, false);
 538 }
 539
 540 /* file_cache's dtor.  */
 541
 542 file_cache::~file_cache ()
 543 {
 544   delete[] m_file_slots;
 545 }
 546
 547 /* Lookup the cache used for the content of a given file accessed by
 548    caret diagnostic.  If no cached file was found, create a new cache
 549    for this file, add it to the array of cached file and return
 550    it.  */
 551
 552 file_cache_slot*
 553 file_cache::lookup_or_add_file (const char *file_path)
 554 {
 555   file_cache_slot *r = lookup_file (file_path);
 556   if (r == NULL)
 557     r = add_file (file_path);
 558   return r;
 559 }
 560
 561 /* Default constructor for a cache of file used by caret
 562    diagnostic.  */
 563
 564 file_cache_slot::file_cache_slot ()
 565 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
 566   m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
 567   m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
 568 {
 569   m_line_record.create (0);
 570 }
 571
 572 /* Destructor for a cache of file used by caret diagnostic.  */
 573
 574 file_cache_slot::~file_cache_slot ()
 575 {
 576   if (m_fp)
 577     {
 578       fclose (m_fp);
 579       m_fp = NULL;
 580     }
 581   if (m_data)
 582     {
 583       offset_buffer (-m_alloc_offset);
 584       XDELETEVEC (m_data);
 585       m_data = 0;
 586     }
 587   m_line_record.release ();
 588 }
 589
 590 /* Returns TRUE iff the cache would need to be filled with data coming
 591    from the file.  That is, either the cache is empty or full or the
 592    current line is empty.  Note that if the cache is full, it would
 593    need to be extended and filled again.  */
 594
 595 bool
 596 file_cache_slot::needs_read_p () const
 597 {
 598   return m_fp && (m_nb_read == 0
 599           || m_nb_read == m_size
 600           || (m_line_start_idx >= m_nb_read - 1));
 601 }
 602
 603 /*  Return TRUE iff the cache is full and thus needs to be
 604     extended.  */
 605
 606 bool
 607 file_cache_slot::needs_grow_p () const
 608 {
 609   return m_nb_read == m_size;
 610 }
 611
 612 /* Grow the cache if it needs to be extended.  */
 613
 614 void
 615 file_cache_slot::maybe_grow ()
 616 {
 617   if (!needs_grow_p ())
 618     return;
 619
 620   if (!m_data)
 621     {
 622       gcc_assert (m_size == 0 && m_alloc_offset == 0);
 623       m_size = buffer_size;
 624       m_data = XNEWVEC (char, m_size);
 625     }
 626   else
 627     {
 628       const int offset = m_alloc_offset;
 629       offset_buffer (-offset);
 630       m_size *= 2;
 631       m_data = XRESIZEVEC (char, m_data, m_size);
 632       offset_buffer (offset);
 633     }
 634 }
 635
 636 /*  Read more data into the cache.  Extends the cache if need be.
 637     Returns TRUE iff new data could be read.  */
 638
 639 bool
 640 file_cache_slot::read_data ()
 641 {
 642   if (feof (m_fp) || ferror (m_fp))
 643     return false;
 644
 645   maybe_grow ();
 646
 647   char * from = m_data + m_nb_read;
 648   size_t to_read = m_size - m_nb_read;
 649   size_t nb_read = fread (from, 1, to_read, m_fp);
 650
 651   if (ferror (m_fp))
 652     return false;
 653
 654   m_nb_read += nb_read;
 655   return !!nb_read;
 656 }
 657
 658 /* Read new data iff the cache needs to be filled with more data
 659    coming from the file FP.  Return TRUE iff the cache was filled with
 660    mode data.  */
 661
 662 bool
 663 file_cache_slot::maybe_read_data ()
 664 {
 665   if (!needs_read_p ())
 666     return false;
 667   return read_data ();
 668 }
 669
 670 /* Helper function for file_cache_slot::get_next_line (), to find the end of
 671    the next line.  Returns with the memchr convention, i.e. nullptr if a line
 672    terminator was not found.  We need to determine line endings in the same
 673    manner that libcpp does: any of \n, \r\n, or \r is a line ending.  */
 674
 675 static char *
 676 find_end_of_line (char *s, size_t len)
 677 {
 678   for (const auto end = s + len; s != end; ++s)
 679     {
 680       if (*s == '\n')
 681         return s;
 682       if (*s == '\r')
 683         {
 684           const auto next = s + 1;
 685           if (next == end)
 686             {
 687               /* Don't find the line ending if \r is the very last character
 688                  in the buffer; we do not know if it's the end of the file or
 689                  just the end of what has been read so far, and we wouldn't
 690                  want to break in the middle of what's actually a \r\n
 691                  sequence.  Instead, we will handle the case of a file ending
 692                  in a \r later.  */
 693               break;
 694             }
 695           return (*next == '\n' ? next : s);
 696         }
 697     }
 698   return nullptr;
 699 }
 700
 701 /* Read a new line from file FP, using C as a cache for the data
 702    coming from the file.  Upon successful completion, *LINE is set to
 703    the beginning of the line found.  *LINE points directly in the
 704    line cache and is only valid until the next call of get_next_line.
 705    *LINE_LEN is set to the length of the line.  Note that the line
 706    does not contain any terminal delimiter.  This function returns
 707    true if some data was read or process from the cache, false
 708    otherwise.  Note that subsequent calls to get_next_line might
 709    make the content of *LINE invalid.  */
 710
 711 bool
 712 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
 713 {
 714   /* Fill the cache with data to process.  */
 715   maybe_read_data ();
 716
 717   size_t remaining_size = m_nb_read - m_line_start_idx;
 718   if (remaining_size == 0)
 719     /* There is no more data to process.  */
 720     return false;
 721
 722   char *line_start = m_data + m_line_start_idx;
 723
 724   char *next_line_start = NULL;
 725   size_t len = 0;
 726   char *line_end = find_end_of_line (line_start, remaining_size);
 727   if (line_end == NULL)
 728     {
 729       /* We haven't found an end-of-line delimiter in the cache.
 730          Fill the cache with more data from the file and look again.  */
 731       while (maybe_read_data ())
 732         {
 733           line_start = m_data + m_line_start_idx;
 734           remaining_size = m_nb_read - m_line_start_idx;
 735           line_end = find_end_of_line (line_start, remaining_size);
 736           if (line_end != NULL)
 737             {
 738               next_line_start = line_end + 1;
 739               break;
 740             }
 741         }
 742       if (line_end == NULL)
 743         {
 744           /* We've loaded all the file into the cache and still no
 745              terminator.  Let's say the line ends up at one byte past the
 746              end of the file.  This is to stay consistent with the case
 747              of when the line ends up with a terminator and line_end points to
 748              that.  That consistency is useful below in the len calculation.
 749
 750              If the file ends in a \r, we didn't identify it as a line
 751              terminator above, so do that now instead.  */
 752           line_end = m_data + m_nb_read;
 753           if (m_nb_read && line_end[-1] == '\r')
 754             {
 755               --line_end;
 756               m_missing_trailing_newline = false;
 757             }
 758           else
 759             m_missing_trailing_newline = true;
 760         }
 761       else
 762         m_missing_trailing_newline = false;
 763     }
 764   else
 765     {
 766       next_line_start = line_end + 1;
 767       m_missing_trailing_newline = false;
 768     }
 769
 770   if (m_fp && ferror (m_fp))
 771     return false;
 772
 773   /* At this point, we've found the end of the of line.  It either points to
 774      the line terminator or to one byte after the last byte of the file.  */
 775   gcc_assert (line_end != NULL);
 776
 777   len = line_end - line_start;
 778
 779   if (m_line_start_idx < m_nb_read)
 780     *line = line_start;
 781
 782   ++m_line_num;
 783
 784   /* Before we update our line record, make sure the hint about the
 785      total number of lines of the file is correct.  If it's not, then
 786      we give up recording line boundaries from now on.  */
 787   bool update_line_record = true;
 788   if (m_line_num > m_total_lines)
 789     update_line_record = false;
 790
 791     /* Now update our line record so that re-reading lines from the
 792      before m_line_start_idx is faster.  */
 793   if (update_line_record
 794       && m_line_record.length () < line_record_size)
 795     {
 796       /* If the file lines fits in the line record, we just record all
 797          its lines ...*/
 798       if (m_total_lines <= line_record_size
 799           && m_line_num > m_line_record.length ())
 800         m_line_record.safe_push
 801           (file_cache_slot::line_info (m_line_num,
 802                                        m_line_start_idx,
 803                                        line_end - m_data));
 804       else if (m_total_lines > line_record_size)
 805         {
 806           /* ... otherwise, we just scale total_lines down to
 807              (line_record_size lines.  */
 808           size_t n = (m_line_num * line_record_size) / m_total_lines;
 809           if (m_line_record.length () == 0
 810               || n >= m_line_record.length ())
 811             m_line_record.safe_push
 812               (file_cache_slot::line_info (m_line_num,
 813                                            m_line_start_idx,
 814                                            line_end - m_data));
 815         }
 816     }
 817
 818   /* Update m_line_start_idx so that it points to the next line to be
 819      read.  */
 820   if (next_line_start)
 821     m_line_start_idx = next_line_start - m_data;
 822   else
 823     /* We didn't find any terminal '\n'.  Let's consider that the end
 824        of line is the end of the data in the cache.  The next
 825        invocation of get_next_line will either read more data from the
 826        underlying file or return false early because we've reached the
 827        end of the file.  */
 828     m_line_start_idx = m_nb_read;
 829
 830   *line_len = len;
 831
 832   return true;
 833 }
 834
 835 /* Consume the next bytes coming from the cache (or from its
 836    underlying file if there are remaining unread bytes in the file)
 837    until we reach the next end-of-line (or end-of-file).  There is no
 838    copying from the cache involved.  Return TRUE upon successful
 839    completion.  */
 840
 841 bool
 842 file_cache_slot::goto_next_line ()
 843 {
 844   char *l;
 845   ssize_t len;
 846
 847   return get_next_line (&l, &len);
 848 }
 849
 850 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 851    If the line was read successfully, *LINE points to the beginning
 852    of the line in the file cache and *LINE_LEN is the length of the
 853    line.  *LINE is not nul-terminated, but may contain zero bytes.
 854    *LINE is only valid until the next call of read_line_num.
 855    This function returns bool if a line was read.  */
 856
 857 bool
 858 file_cache_slot::read_line_num (size_t line_num,
 859                        char ** line, ssize_t *line_len)
 860 {
 861   gcc_assert (line_num > 0);
 862
 863   if (line_num <= m_line_num)
 864     {
 865       /* We've been asked to read lines that are before m_line_num.
 866          So lets use our line record (if it's not empty) to try to
 867          avoid re-reading the file from the beginning again.  */
 868
 869       if (m_line_record.is_empty ())
 870         {
 871           m_line_start_idx = 0;
 872           m_line_num = 0;
 873         }
 874       else
 875         {
 876           file_cache_slot::line_info *i = NULL;
 877           if (m_total_lines <= line_record_size)
 878             {
 879               /* In languages where the input file is not totally
 880                  preprocessed up front, the m_total_lines hint
 881                  can be smaller than the number of lines of the
 882                  file.  In that case, only the first
 883                  m_total_lines have been recorded.
 884
 885                  Otherwise, the first m_total_lines we've read have
 886                  their start/end recorded here.  */
 887               i = (line_num <= m_total_lines)
 888                 ? &m_line_record[line_num - 1]
 889                 : &m_line_record[m_total_lines - 1];
 890               gcc_assert (i->line_num <= line_num);
 891             }
 892           else
 893             {
 894               /*  So the file had more lines than our line record
 895                   size.  Thus the number of lines we've recorded has
 896                   been scaled down to line_record_size.  Let's
 897                   pick the start/end of the recorded line that is
 898                   closest to line_num.  */
 899               size_t n = (line_num <= m_total_lines)
 900                 ? line_num * line_record_size / m_total_lines
 901                 : m_line_record.length () - 1;
 902               if (n < m_line_record.length ())
 903                 {
 904                   i = &m_line_record[n];
 905                   gcc_assert (i->line_num <= line_num);
 906                 }
 907             }
 908
 909           if (i && i->line_num == line_num)
 910             {
 911               /* We have the start/end of the line.  */
 912               *line = m_data + i->start_pos;
 913               *line_len = i->end_pos - i->start_pos;
 914               return true;
 915             }
 916
 917           if (i)
 918             {
 919               m_line_start_idx = i->start_pos;
 920               m_line_num = i->line_num - 1;
 921             }
 922           else
 923             {
 924               m_line_start_idx = 0;
 925               m_line_num = 0;
 926             }
 927         }
 928     }
 929
 930   /*  Let's walk from line m_line_num up to line_num - 1, without
 931       copying any line.  */
 932   while (m_line_num < line_num - 1)
 933     if (!goto_next_line ())
 934       return false;
 935
 936   /* The line we want is the next one.  Let's read and copy it back to
 937      the caller.  */
 938   return get_next_line (line, line_len);
 939 }
 940
 941 /* Return the physical source line that corresponds to FILE_PATH/LINE.
 942    The line is not nul-terminated.  The returned pointer is only
 943    valid until the next call of location_get_source_line.
 944    Note that the line can contain several null characters,
 945    so the returned value's length has the actual length of the line.
 946    If the function fails, a NULL char_span is returned.  */
 947
 948 char_span
 949 location_get_source_line (const char *file_path, int line)
 950 {
 951   char *buffer = NULL;
 952   ssize_t len;
 953
 954   if (line == 0)
 955     return char_span (NULL, 0);
 956
 957   if (file_path == NULL)
 958     return char_span (NULL, 0);
 959
 960   diagnostic_file_cache_init ();
 961
 962   file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
 963   if (c == NULL)
 964     return char_span (NULL, 0);
 965
 966   bool read = c->read_line_num (line, &buffer, &len);
 967   if (!read)
 968     return char_span (NULL, 0);
 969
 970   return char_span (buffer, len);
 971 }
 972
 973 /* Return a NUL-terminated copy of the source text between two locations, or
 974    NULL if the arguments are invalid.  The caller is responsible for freeing
 975    the return value.  */
 976
 977 char *
 978 get_source_text_between (location_t start, location_t end)
 979 {
 980   expanded_location expstart =
 981     expand_location_to_spelling_point (start, LOCATION_ASPECT_START);
 982   expanded_location expend =
 983     expand_location_to_spelling_point (end, LOCATION_ASPECT_FINISH);
 984
 985   /* If the locations are in different files or the end comes before the
 986      start, give up and return nothing.  */
 987   if (!expstart.file || !expend.file)
 988     return NULL;
 989   if (strcmp (expstart.file, expend.file) != 0)
 990     return NULL;
 991   if (expstart.line > expend.line)
 992     return NULL;
 993   if (expstart.line == expend.line
 994       && expstart.column > expend.column)
 995     return NULL;
 996   /* These aren't real column numbers, give up.  */
 997   if (expstart.column == 0 || expend.column == 0)
 998     return NULL;
 999
1000   /* For a single line we need to trim both edges.  */
1001   if (expstart.line == expend.line)
1002     {
1003       char_span line = location_get_source_line (expstart.file, expstart.line);
1004       if (line.length () < 1)
1005         return NULL;
1006       int s = expstart.column - 1;
1007       int len = expend.column - s;
1008       if (line.length () < (size_t)expend.column)
1009         return NULL;
1010       return line.subspan (s, len).xstrdup ();
1011     }
1012
1013   struct obstack buf_obstack;
1014   obstack_init (&buf_obstack);
1015
1016   /* Loop through all lines in the range and append each to buf; may trim
1017      parts of the start and end lines off depending on column values.  */
1018   for (int lnum = expstart.line; lnum <= expend.line; ++lnum)
1019     {
1020       char_span line = location_get_source_line (expstart.file, lnum);
1021       if (line.length () < 1 && (lnum != expstart.line && lnum != expend.line))
1022         continue;
1023
1024       /* For the first line in the range, only start at expstart.column */
1025       if (lnum == expstart.line)
1026         {
1027           unsigned off = expstart.column - 1;
1028           if (line.length () < off)
1029             return NULL;
1030           line = line.subspan (off, line.length() - off);
1031         }
1032       /* For the last line, don't go past expend.column */
1033       else if (lnum == expend.line)
1034         {
1035           if (line.length () < (size_t)expend.column)
1036             return NULL;
1037           line = line.subspan (0, expend.column);
1038         }
1039
1040       /* Combine spaces at the beginning of later lines.  */
1041       if (lnum > expstart.line)
1042         {
1043           unsigned off;
1044           for (off = 0; off < line.length(); ++off)
1045             if (line[off] != ' ' && line[off] != '\t')
1046               break;
1047           if (off > 0)
1048             {
1049               obstack_1grow (&buf_obstack, ' ');
1050               line = line.subspan (off, line.length() - off);
1051             }
1052         }
1053
1054       /* This does not include any trailing newlines.  */
1055       obstack_grow (&buf_obstack, line.get_buffer (), line.length ());
1056     }
1057
1058   /* NUL-terminate and finish the buf obstack.  */
1059   obstack_1grow (&buf_obstack, 0);
1060   const char *buf = (const char *) obstack_finish (&buf_obstack);
1061
1062   return xstrdup (buf);
1063 }
1064
1065 /* Get a borrowed char_span to the full content of FILE_PATH
1066    as decoded according to the input charset, encoded as UTF-8.  */
1067
1068 char_span
1069 get_source_file_content (const char *file_path)
1070 {
1071   diagnostic_file_cache_init ();
1072
1073   file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
1074   return c->get_full_file_content ();
1075 }
1076
1077 /* Determine if FILE_PATH missing a trailing newline on its final line.
1078    Only valid to call once all of the file has been loaded, by
1079    requesting a line number beyond the end of the file.  */
1080
1081 bool
1082 location_missing_trailing_newline (const char *file_path)
1083 {
1084   diagnostic_file_cache_init ();
1085
1086   file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
1087   if (c == NULL)
1088     return false;
1089
1090   return c->missing_trailing_newline_p ();
1091 }
1092
1093 /* Test if the location originates from the spelling location of a
1094    builtin-tokens.  That is, return TRUE if LOC is a (possibly
1095    virtual) location of a built-in token that appears in the expansion
1096    list of a macro.  Please note that this function also works on
1097    tokens that result from built-in tokens.  For instance, the
1098    function would return true if passed a token "4" that is the result
1099    of the expansion of the built-in __LINE__ macro.  */
1100 bool
1101 is_location_from_builtin_token (location_t loc)
1102 {
1103   const line_map_ordinary *map = NULL;
1104   loc = linemap_resolve_location (line_table, loc,
1105                                   LRK_SPELLING_LOCATION, &map);
1106   return loc == BUILTINS_LOCATION;
1107 }
1108
1109 /* Expand the source location LOC into a human readable location.  If
1110    LOC is virtual, it resolves to the expansion point of the involved
1111    macro.  If LOC resolves to a builtin location, the file name of the
1112    readable location is set to the string "<built-in>".  */
1113
1114 expanded_location
1115 expand_location (location_t loc)
1116 {
1117   return expand_location_1 (loc, /*expansion_point_p=*/true,
1118                             LOCATION_ASPECT_CARET);
1119 }
1120
1121 /* Expand the source location LOC into a human readable location.  If
1122    LOC is virtual, it resolves to the expansion location of the
1123    relevant macro.  If LOC resolves to a builtin location, the file
1124    name of the readable location is set to the string
1125    "<built-in>".  */
1126
1127 expanded_location
1128 expand_location_to_spelling_point (location_t loc,
1129                                    enum location_aspect aspect)
1130 {
1131   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
1132 }
1133
1134 /* The rich_location class within libcpp requires a way to expand
1135    location_t instances, and relies on the client code
1136    providing a symbol named
1137      linemap_client_expand_location_to_spelling_point
1138    to do this.
1139
1140    This is the implementation for libcommon.a (all host binaries),
1141    which simply calls into expand_location_1.  */
1142
1143 expanded_location
1144 linemap_client_expand_location_to_spelling_point (location_t loc,
1145                                                   enum location_aspect aspect)
1146 {
1147   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
1148 }
1149
1150
1151 /* If LOCATION is in a system header and if it is a virtual location
1152    for a token coming from the expansion of a macro, unwind it to
1153    the location of the expansion point of the macro.  If the expansion
1154    point is also in a system header return the original LOCATION.
1155    Otherwise, return the location of the expansion point.
1156
1157    This is used for instance when we want to emit diagnostics about a
1158    token that may be located in a macro that is itself defined in a
1159    system header, for example, for the NULL macro.  In such a case, if
1160    LOCATION were passed directly to diagnostic functions such as
1161    warning_at, the diagnostic would be suppressed (unless
1162    -Wsystem-headers).  */
1163
1164 location_t
1165 expansion_point_location_if_in_system_header (location_t location)
1166 {
1167   if (!in_system_header_at (location))
1168     return location;
1169
1170   location_t xloc = linemap_resolve_location (line_table, location,
1171                                               LRK_MACRO_EXPANSION_POINT,
1172                                               NULL);
1173   return in_system_header_at (xloc) ? location : xloc;
1174 }
1175
1176 /* If LOCATION is a virtual location for a token coming from the expansion
1177    of a macro, unwind to the location of the expansion point of the macro.  */
1178
1179 location_t
1180 expansion_point_location (location_t location)
1181 {
1182   return linemap_resolve_location (line_table, location,
1183                                    LRK_MACRO_EXPANSION_POINT, NULL);
1184 }
1185
1186 /* Construct a location with caret at CARET, ranging from START to
1187    finish e.g.
1188
1189                  11111111112
1190         12345678901234567890
1191      522
1192      523   return foo + bar;
1193                   ~~~~^~~~~
1194      524
1195
1196    The location's caret is at the "+", line 523 column 15, but starts
1197    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
1198    of "bar" at column 19.  */
1199
1200 location_t
1201 make_location (location_t caret, location_t start, location_t finish)
1202 {
1203   location_t pure_loc = get_pure_location (caret);
1204   source_range src_range;
1205   src_range.m_start = get_start (start);
1206   src_range.m_finish = get_finish (finish);
1207   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
1208                                                    pure_loc,
1209                                                    src_range,
1210                                                    NULL,
1211                                                    0);
1212   return combined_loc;
1213 }
1214
1215 /* Same as above, but taking a source range rather than two locations.  */
1216
1217 location_t
1218 make_location (location_t caret, source_range src_range)
1219 {
1220   location_t pure_loc = get_pure_location (caret);
1221   return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL, 0);
1222 }
1223
1224 /* An expanded_location stores the column in byte units.  This function
1225    converts that column to display units.  That requires reading the associated
1226    source line in order to calculate the display width.  If that cannot be done
1227    for any reason, then returns the byte column as a fallback.  */
1228 int
1229 location_compute_display_column (expanded_location exploc,
1230                                  const cpp_char_column_policy &policy)
1231 {
1232   if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1233     return exploc.column;
1234   char_span line = location_get_source_line (exploc.file, exploc.line);
1235   /* If line is NULL, this function returns exploc.column which is the
1236      desired fallback.  */
1237   return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
1238                                             exploc.column, policy);
1239 }
1240
1241 /* Dump statistics to stderr about the memory usage of the line_table
1242    set of line maps.  This also displays some statistics about macro
1243    expansion.  */
1244
1245 void
1246 dump_line_table_statistics (void)
1247 {
1248   struct linemap_stats s;
1249   long total_used_map_size,
1250     macro_maps_size,
1251     total_allocated_map_size;
1252
1253   memset (&s, 0, sizeof (s));
1254
1255   linemap_get_statistics (line_table, &s);
1256
1257   macro_maps_size = s.macro_maps_used_size
1258     + s.macro_maps_locations_size;
1259
1260   total_allocated_map_size = s.ordinary_maps_allocated_size
1261     + s.macro_maps_allocated_size
1262     + s.macro_maps_locations_size;
1263
1264   total_used_map_size = s.ordinary_maps_used_size
1265     + s.macro_maps_used_size
1266     + s.macro_maps_locations_size;
1267
1268   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
1269            s.num_expanded_macros);
1270   if (s.num_expanded_macros != 0)
1271     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
1272              s.num_macro_tokens / s.num_expanded_macros);
1273   fprintf (stderr,
1274            "\nLine Table allocations during the "
1275            "compilation process\n");
1276   fprintf (stderr, "Number of ordinary maps used:        " PRsa (5) "\n",
1277            SIZE_AMOUNT (s.num_ordinary_maps_used));
1278   fprintf (stderr, "Ordinary map used size:              " PRsa (5) "\n",
1279            SIZE_AMOUNT (s.ordinary_maps_used_size));
1280   fprintf (stderr, "Number of ordinary maps allocated:   " PRsa (5) "\n",
1281            SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1282   fprintf (stderr, "Ordinary maps allocated size:        " PRsa (5) "\n",
1283            SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1284   fprintf (stderr, "Number of macro maps used:           " PRsa (5) "\n",
1285            SIZE_AMOUNT (s.num_macro_maps_used));
1286   fprintf (stderr, "Macro maps used size:                " PRsa (5) "\n",
1287            SIZE_AMOUNT (s.macro_maps_used_size));
1288   fprintf (stderr, "Macro maps locations size:           " PRsa (5) "\n",
1289            SIZE_AMOUNT (s.macro_maps_locations_size));
1290   fprintf (stderr, "Macro maps size:                     " PRsa (5) "\n",
1291            SIZE_AMOUNT (macro_maps_size));
1292   fprintf (stderr, "Duplicated maps locations size:      " PRsa (5) "\n",
1293            SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1294   fprintf (stderr, "Total allocated maps size:           " PRsa (5) "\n",
1295            SIZE_AMOUNT (total_allocated_map_size));
1296   fprintf (stderr, "Total used maps size:                " PRsa (5) "\n",
1297            SIZE_AMOUNT (total_used_map_size));
1298   fprintf (stderr, "Ad-hoc table size:                   " PRsa (5) "\n",
1299            SIZE_AMOUNT (s.adhoc_table_size));
1300   fprintf (stderr, "Ad-hoc table entries used:           " PRsa (5) "\n",
1301            SIZE_AMOUNT (s.adhoc_table_entries_used));
1302   fprintf (stderr, "optimized_ranges:                    " PRsa (5) "\n",
1303            SIZE_AMOUNT (line_table->num_optimized_ranges));
1304   fprintf (stderr, "unoptimized_ranges:                  " PRsa (5) "\n",
1305            SIZE_AMOUNT (line_table->num_unoptimized_ranges));
1306
1307   fprintf (stderr, "\n");
1308 }
1309
1310 /* Get location one beyond the final location in ordinary map IDX.  */
1311
1312 static location_t
1313 get_end_location (class line_maps *set, unsigned int idx)
1314 {
1315   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1316     return set->highest_location;
1317
1318   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1319   return MAP_START_LOCATION (next_map);
1320 }
1321
1322 /* Helper function for write_digit_row.  */
1323
1324 static void
1325 write_digit (FILE *stream, int digit)
1326 {
1327   fputc ('0' + (digit % 10), stream);
1328 }
1329
1330 /* Helper function for dump_location_info.
1331    Write a row of numbers to STREAM, numbering a source line,
1332    giving the units, tens, hundreds etc of the column number.  */
1333
1334 static void
1335 write_digit_row (FILE *stream, int indent,
1336                  const line_map_ordinary *map,
1337                  location_t loc, int max_col, int divisor)
1338 {
1339   fprintf (stream, "%*c", indent, ' ');
1340   fprintf (stream, "|");
1341   for (int column = 1; column < max_col; column++)
1342     {
1343       location_t column_loc = loc + (column << map->m_range_bits);
1344       write_digit (stream, column_loc / divisor);
1345     }
1346   fprintf (stream, "\n");
1347 }
1348
1349 /* Write a half-closed (START) / half-open (END) interval of
1350    location_t to STREAM.  */
1351
1352 static void
1353 dump_location_range (FILE *stream,
1354                      location_t start, location_t end)
1355 {
1356   fprintf (stream,
1357            "  location_t interval: %u <= loc < %u\n",
1358            start, end);
1359 }
1360
1361 /* Write a labelled description of a half-closed (START) / half-open (END)
1362    interval of location_t to STREAM.  */
1363
1364 static void
1365 dump_labelled_location_range (FILE *stream,
1366                               const char *name,
1367                               location_t start, location_t end)
1368 {
1369   fprintf (stream, "%s\n", name);
1370   dump_location_range (stream, start, end);
1371   fprintf (stream, "\n");
1372 }
1373
1374 /* Write a visualization of the locations in the line_table to STREAM.  */
1375
1376 void
1377 dump_location_info (FILE *stream)
1378 {
1379   /* Visualize the reserved locations.  */
1380   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1381                                 0, RESERVED_LOCATION_COUNT);
1382
1383   /* Visualize the ordinary line_map instances, rendering the sources. */
1384   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1385     {
1386       location_t end_location = get_end_location (line_table, idx);
1387       /* half-closed: doesn't include this one. */
1388
1389       const line_map_ordinary *map
1390         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1391       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1392       dump_location_range (stream,
1393                            MAP_START_LOCATION (map), end_location);
1394       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1395       fprintf (stream, "  starting at line: %i\n",
1396                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1397       fprintf (stream, "  column and range bits: %i\n",
1398                map->m_column_and_range_bits);
1399       fprintf (stream, "  column bits: %i\n",
1400                map->m_column_and_range_bits - map->m_range_bits);
1401       fprintf (stream, "  range bits: %i\n",
1402                map->m_range_bits);
1403       const char * reason;
1404       switch (map->reason) {
1405       case LC_ENTER:
1406         reason = "LC_ENTER";
1407         break;
1408       case LC_LEAVE:
1409         reason = "LC_LEAVE";
1410         break;
1411       case LC_RENAME:
1412         reason = "LC_RENAME";
1413         break;
1414       case LC_RENAME_VERBATIM:
1415         reason = "LC_RENAME_VERBATIM";
1416         break;
1417       case LC_ENTER_MACRO:
1418         reason = "LC_RENAME_MACRO";
1419         break;
1420       default:
1421         reason = "Unknown";
1422       }
1423       fprintf (stream, "  reason: %d (%s)\n", map->reason, reason);
1424
1425       const line_map_ordinary *includer_map
1426         = linemap_included_from_linemap (line_table, map);
1427       fprintf (stream, "  included from location: %d",
1428                linemap_included_from (map));
1429       if (includer_map) {
1430         fprintf (stream, " (in ordinary map %d)",
1431                  int (includer_map - line_table->info_ordinary.maps));
1432       }
1433       fprintf (stream, "\n");
1434
1435       /* Render the span of source lines that this "map" covers.  */
1436       for (location_t loc = MAP_START_LOCATION (map);
1437            loc < end_location;
1438            loc += (1 << map->m_range_bits) )
1439         {
1440           gcc_assert (pure_location_p (line_table, loc) );
1441
1442           expanded_location exploc
1443             = linemap_expand_location (line_table, map, loc);
1444
1445           if (exploc.column == 0)
1446             {
1447               /* Beginning of a new source line: draw the line.  */
1448
1449               char_span line_text = location_get_source_line (exploc.file,
1450                                                               exploc.line);
1451               if (!line_text)
1452                 break;
1453               fprintf (stream,
1454                        "%s:%3i|loc:%5i|%.*s\n",
1455                        exploc.file, exploc.line,
1456                        loc,
1457                        (int)line_text.length (), line_text.get_buffer ());
1458
1459               /* "loc" is at column 0, which means "the whole line".
1460                  Render the locations *within* the line, by underlining
1461                  it, showing the location_t numeric values
1462                  at each column.  */
1463               size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1464               if (max_col > line_text.length ())
1465                 max_col = line_text.length () + 1;
1466
1467               int len_lnum = num_digits (exploc.line);
1468               if (len_lnum < 3)
1469                 len_lnum = 3;
1470               int len_loc = num_digits (loc);
1471               if (len_loc < 5)
1472                 len_loc = 5;
1473
1474               int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1475
1476               /* Thousands.  */
1477               if (end_location > 999)
1478                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1479
1480               /* Hundreds.  */
1481               if (end_location > 99)
1482                 write_digit_row (stream, indent, map, loc, max_col, 100);
1483
1484               /* Tens.  */
1485               write_digit_row (stream, indent, map, loc, max_col, 10);
1486
1487               /* Units.  */
1488               write_digit_row (stream, indent, map, loc, max_col, 1);
1489             }
1490         }
1491       fprintf (stream, "\n");
1492     }
1493
1494   /* Visualize unallocated values.  */
1495   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1496                                 line_table->highest_location,
1497                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1498
1499   /* Visualize the macro line_map instances, rendering the sources. */
1500   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1501     {
1502       /* Each macro map that is allocated owns location_t values
1503          that are *lower* that the one before them.
1504          Hence it's meaningful to view them either in order of ascending
1505          source locations, or in order of ascending macro map index.  */
1506       const bool ascending_location_ts = true;
1507       unsigned int idx = (ascending_location_ts
1508                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1509                           : i);
1510       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1511       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1512                idx,
1513                linemap_map_get_macro_name (map),
1514                MACRO_MAP_NUM_MACRO_TOKENS (map));
1515       dump_location_range (stream,
1516                            map->start_location,
1517                            (map->start_location
1518                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1519       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1520               "expansion point is location %i",
1521               MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1522       fprintf (stream, "  map->start_location: %u\n",
1523                map->start_location);
1524
1525       fprintf (stream, "  macro_locations:\n");
1526       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1527         {
1528           location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1529           location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1530
1531           /* linemap_add_macro_token encodes token numbers in an expansion
1532              by putting them after MAP_START_LOCATION. */
1533
1534           /* I'm typically seeing 4 uninitialized entries at the end of
1535              0xafafafaf.
1536              This appears to be due to macro.cc:replace_args
1537              adding 2 extra args for padding tokens; presumably there may
1538              be a leading and/or trailing padding token injected,
1539              each for 2 more location slots.
1540              This would explain there being up to 4 location_ts slots
1541              that may be uninitialized.  */
1542
1543           fprintf (stream, "    %u: %u, %u\n",
1544                    i,
1545                    x,
1546                    y);
1547           if (x == y)
1548             {
1549               if (x < MAP_START_LOCATION (map))
1550                 inform (x, "token %u has %<x-location == y-location == %u%>",
1551                         i, x);
1552               else
1553                 fprintf (stream,
1554                          "x-location == y-location == %u encodes token # %u\n",
1555                          x, x - MAP_START_LOCATION (map));
1556                 }
1557           else
1558             {
1559               inform (x, "token %u has %<x-location == %u%>", i, x);
1560               inform (x, "token %u has %<y-location == %u%>", i, y);
1561             }
1562         }
1563       fprintf (stream, "\n");
1564     }
1565
1566   /* It appears that MAX_LOCATION_T itself is never assigned to a
1567      macro map, presumably due to an off-by-one error somewhere
1568      between the logic in linemap_enter_macro and
1569      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1570   dump_labelled_location_range (stream, "MAX_LOCATION_T",
1571                                 MAX_LOCATION_T,
1572                                 MAX_LOCATION_T + 1);
1573
1574   /* Visualize ad-hoc values.  */
1575   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1576                                 MAX_LOCATION_T + 1, UINT_MAX);
1577 }
1578
1579 /* string_concat's constructor.  */
1580
1581 string_concat::string_concat (int num, location_t *locs)
1582   : m_num (num)
1583 {
1584   m_locs = ggc_vec_alloc <location_t> (num);
1585   for (int i = 0; i < num; i++)
1586     m_locs[i] = locs[i];
1587 }
1588
1589 /* string_concat_db's constructor.  */
1590
1591 string_concat_db::string_concat_db ()
1592 {
1593   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1594 }
1595
1596 /* Record that a string concatenation occurred, covering NUM
1597    string literal tokens.  LOCS is an array of size NUM, containing the
1598    locations of the tokens.  A copy of LOCS is taken.  */
1599
1600 void
1601 string_concat_db::record_string_concatenation (int num, location_t *locs)
1602 {
1603   gcc_assert (num > 1);
1604   gcc_assert (locs);
1605
1606   location_t key_loc = get_key_loc (locs[0]);
1607   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1608      any data now recorded under key 'key_loc' would be overwritten by a
1609      subsequent call with the same key 'key_loc'.  */
1610   if (RESERVED_LOCATION_P (key_loc))
1611     return;
1612
1613   string_concat *concat
1614     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1615   m_table->put (key_loc, concat);
1616 }
1617
1618 /* Determine if LOC was the location of the initial token of a
1619    concatenation of string literal tokens.
1620    If so, *OUT_NUM is written to with the number of tokens, and
1621    *OUT_LOCS with the location of an array of locations of the
1622    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1623    storage owned by the string_concat_db.
1624    Otherwise, return false.  */
1625
1626 bool
1627 string_concat_db::get_string_concatenation (location_t loc,
1628                                             int *out_num,
1629                                             location_t **out_locs)
1630 {
1631   gcc_assert (out_num);
1632   gcc_assert (out_locs);
1633
1634   location_t key_loc = get_key_loc (loc);
1635   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1636      discussion in 'string_concat_db::record_string_concatenation'.  */
1637   if (RESERVED_LOCATION_P (key_loc))
1638     return false;
1639
1640   string_concat **concat = m_table->get (key_loc);
1641   if (!concat)
1642     return false;
1643
1644   *out_num = (*concat)->m_num;
1645   *out_locs =(*concat)->m_locs;
1646   return true;
1647 }
1648
1649 /* Internal function.  Canonicalize LOC into a form suitable for
1650    use as a key within the database, stripping away macro expansion,
1651    ad-hoc information, and range information, using the location of
1652    the start of LOC within an ordinary linemap.  */
1653
1654 location_t
1655 string_concat_db::get_key_loc (location_t loc)
1656 {
1657   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1658                                   NULL);
1659
1660   loc = get_range_from_loc (line_table, loc).m_start;
1661
1662   return loc;
1663 }
1664
1665 /* Helper class for use within get_substring_ranges_for_loc.
1666    An vec of cpp_string with responsibility for releasing all of the
1667    str->text for each str in the vector.  */
1668
1669 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1670 {
1671  public:
1672   auto_cpp_string_vec (int alloc)
1673     : auto_vec <cpp_string> (alloc) {}
1674
1675   ~auto_cpp_string_vec ()
1676   {
1677     /* Clean up the copies within this vec.  */
1678     int i;
1679     cpp_string *str;
1680     FOR_EACH_VEC_ELT (*this, i, str)
1681       free (const_cast <unsigned char *> (str->text));
1682   }
1683 };
1684
1685 /* Attempt to populate RANGES with source location information on the
1686    individual characters within the string literal found at STRLOC.
1687    If CONCATS is non-NULL, then any string literals that the token at
1688    STRLOC  was concatenated with are also added to RANGES.
1689
1690    Return NULL if successful, or an error message if any errors occurred (in
1691    which case RANGES may be only partially populated and should not
1692    be used).
1693
1694    This is implemented by re-parsing the relevant source line(s).  */
1695
1696 static const char *
1697 get_substring_ranges_for_loc (cpp_reader *pfile,
1698                               string_concat_db *concats,
1699                               location_t strloc,
1700                               enum cpp_ttype type,
1701                               cpp_substring_ranges &ranges)
1702 {
1703   gcc_assert (pfile);
1704
1705   if (strloc == UNKNOWN_LOCATION)
1706     return "unknown location";
1707
1708   /* Reparsing the strings requires accurate location information.
1709      If -ftrack-macro-expansion has been overridden from its default
1710      of 2, then we might have a location of a macro expansion point,
1711      rather than the location of the literal itself.
1712      Avoid this by requiring that we have full macro expansion tracking
1713      for substring locations to be available.  */
1714   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1715     return "track_macro_expansion != 2";
1716
1717   /* If #line or # 44 "file"-style directives are present, then there's
1718      no guarantee that the line numbers we have can be used to locate
1719      the strings.  For example, we might have a .i file with # directives
1720      pointing back to lines within a .c file, but the .c file might
1721      have been edited since the .i file was created.
1722      In such a case, the safest course is to disable on-demand substring
1723      locations.  */
1724   if (line_table->seen_line_directive)
1725     return "seen line directive";
1726
1727   /* If string concatenation has occurred at STRLOC, get the locations
1728      of all of the literal tokens making up the compound string.
1729      Otherwise, just use STRLOC.  */
1730   int num_locs = 1;
1731   location_t *strlocs = &strloc;
1732   if (concats)
1733     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1734
1735   auto_cpp_string_vec strs (num_locs);
1736   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1737   for (int i = 0; i < num_locs; i++)
1738     {
1739       /* Get range of strloc.  We will use it to locate the start and finish
1740          of the literal token within the line.  */
1741       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1742
1743       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1744         {
1745           /* If the string token was within a macro expansion, then we can
1746              cope with it for the simple case where we have a single token.
1747              Otherwise, bail out.  */
1748           if (src_range.m_start != src_range.m_finish)
1749             return "macro expansion";
1750         }
1751       else
1752         {
1753           if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1754             /* If so, we can't reliably determine where the token started within
1755                its line.  */
1756             return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1757
1758           if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1759             /* If so, we can't reliably determine where the token finished
1760                within its line.  */
1761             return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1762         }
1763
1764       expanded_location start
1765         = expand_location_to_spelling_point (src_range.m_start,
1766                                              LOCATION_ASPECT_START);
1767       expanded_location finish
1768         = expand_location_to_spelling_point (src_range.m_finish,
1769                                              LOCATION_ASPECT_FINISH);
1770       if (start.file != finish.file)
1771         return "range endpoints are in different files";
1772       if (start.line != finish.line)
1773         return "range endpoints are on different lines";
1774       if (start.column > finish.column)
1775         return "range endpoints are reversed";
1776
1777       char_span line = location_get_source_line (start.file, start.line);
1778       if (!line)
1779         return "unable to read source line";
1780
1781       /* Determine the location of the literal (including quotes
1782          and leading prefix chars, such as the 'u' in a u""
1783          token).  */
1784       size_t literal_length = finish.column - start.column + 1;
1785
1786       /* Ensure that we don't crash if we got the wrong location.  */
1787       if (start.column < 1)
1788         return "zero start column";
1789       if (line.length () < (start.column - 1 + literal_length))
1790         return "line is not wide enough";
1791
1792       char_span literal = line.subspan (start.column - 1, literal_length);
1793
1794       cpp_string from;
1795       from.len = literal_length;
1796       /* Make a copy of the literal, to avoid having to rely on
1797          the lifetime of the copy of the line within the cache.
1798          This will be released by the auto_cpp_string_vec dtor.  */
1799       from.text = (unsigned char *)literal.xstrdup ();
1800       strs.safe_push (from);
1801
1802       /* For very long lines, a new linemap could have started
1803          halfway through the token.
1804          Ensure that the loc_reader uses the linemap of the
1805          *end* of the token for its start location.  */
1806       const line_map_ordinary *start_ord_map;
1807       linemap_resolve_location (line_table, src_range.m_start,
1808                                 LRK_SPELLING_LOCATION, &start_ord_map);
1809       const line_map_ordinary *final_ord_map;
1810       linemap_resolve_location (line_table, src_range.m_finish,
1811                                 LRK_SPELLING_LOCATION, &final_ord_map);
1812       if (start_ord_map == NULL || final_ord_map == NULL)
1813         return "failed to get ordinary maps";
1814       /* Bulletproofing.  We ought to only have different ordinary maps
1815          for start vs finish due to line-length jumps.  */
1816       if (start_ord_map != final_ord_map
1817           && start_ord_map->to_file != final_ord_map->to_file)
1818         return "start and finish are spelled in different ordinary maps";
1819       /* The file from linemap_resolve_location ought to match that from
1820          expand_location_to_spelling_point.  */
1821       if (start_ord_map->to_file != start.file)
1822         return "mismatching file after resolving linemap";
1823
1824       location_t start_loc
1825         = linemap_position_for_line_and_column (line_table, final_ord_map,
1826                                                 start.line, start.column);
1827
1828       cpp_string_location_reader loc_reader (start_loc, line_table);
1829       loc_readers.safe_push (loc_reader);
1830     }
1831
1832   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1833   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1834                                                  loc_readers.address (),
1835                                                  num_locs, &ranges, type);
1836   if (err)
1837     return err;
1838
1839   /* Success: "ranges" should now contain information on the string.  */
1840   return NULL;
1841 }
1842
1843 /* Attempt to populate *OUT_LOC with source location information on the
1844    given characters within the string literal found at STRLOC.
1845    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1846    character set.
1847
1848    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1849    and string literal "012345\n789"
1850    *OUT_LOC is written to with:
1851      "012345\n789"
1852          ~^~~~~
1853
1854    If CONCATS is non-NULL, then any string literals that the token at
1855    STRLOC was concatenated with are also considered.
1856
1857    This is implemented by re-parsing the relevant source line(s).
1858
1859    Return NULL if successful, or an error message if any errors occurred.
1860    Error messages are intended for GCC developers (to help debugging) rather
1861    than for end-users.  */
1862
1863 const char *
1864 get_location_within_string (cpp_reader *pfile,
1865                             string_concat_db *concats,
1866                             location_t strloc,
1867                             enum cpp_ttype type,
1868                             int caret_idx, int start_idx, int end_idx,
1869                             location_t *out_loc)
1870 {
1871   gcc_checking_assert (caret_idx >= 0);
1872   gcc_checking_assert (start_idx >= 0);
1873   gcc_checking_assert (end_idx >= 0);
1874   gcc_assert (out_loc);
1875
1876   cpp_substring_ranges ranges;
1877   const char *err
1878     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1879   if (err)
1880     return err;
1881
1882   if (caret_idx >= ranges.get_num_ranges ())
1883     return "caret_idx out of range";
1884   if (start_idx >= ranges.get_num_ranges ())
1885     return "start_idx out of range";
1886   if (end_idx >= ranges.get_num_ranges ())
1887     return "end_idx out of range";
1888
1889   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1890                             ranges.get_range (start_idx).m_start,
1891                             ranges.get_range (end_idx).m_finish);
1892   return NULL;
1893 }
1894
1895 /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */
1896
1897 location_t
1898 location_with_discriminator (location_t locus, int discriminator)
1899 {
1900   tree block = LOCATION_BLOCK (locus);
1901   source_range src_range = get_range_from_loc (line_table, locus);
1902   locus = get_pure_location (locus);
1903
1904   if (locus == UNKNOWN_LOCATION)
1905     return locus;
1906
1907   return COMBINE_LOCATION_DATA (line_table, locus, src_range, block, discriminator);
1908 }
1909
1910 /* Return TRUE if LOCUS represents a location with a discriminator.  */
1911
1912 bool
1913 has_discriminator (location_t locus)
1914 {
1915   return get_discriminator_from_loc (locus) != 0;
1916 }
1917
1918 /* Return the discriminator for LOCUS.  */
1919
1920 int
1921 get_discriminator_from_loc (location_t locus)
1922 {
1923   return get_discriminator_from_loc (line_table, locus);
1924 }
1925
1926 #if CHECKING_P
1927
1928 namespace selftest {
1929
1930 /* Selftests of location handling.  */
1931
1932 /* Attempt to populate *OUT_RANGE with source location information on the
1933    given character within the string literal found at STRLOC.
1934    CHAR_IDX refers to an offset within the execution character set.
1935    If CONCATS is non-NULL, then any string literals that the token at
1936    STRLOC was concatenated with are also considered.
1937
1938    This is implemented by re-parsing the relevant source line(s).
1939
1940    Return NULL if successful, or an error message if any errors occurred.
1941    Error messages are intended for GCC developers (to help debugging) rather
1942    than for end-users.  */
1943
1944 static const char *
1945 get_source_range_for_char (cpp_reader *pfile,
1946                            string_concat_db *concats,
1947                            location_t strloc,
1948                            enum cpp_ttype type,
1949                            int char_idx,
1950                            source_range *out_range)
1951 {
1952   gcc_checking_assert (char_idx >= 0);
1953   gcc_assert (out_range);
1954
1955   cpp_substring_ranges ranges;
1956   const char *err
1957     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1958   if (err)
1959     return err;
1960
1961   if (char_idx >= ranges.get_num_ranges ())
1962     return "char_idx out of range";
1963
1964   *out_range = ranges.get_range (char_idx);
1965   return NULL;
1966 }
1967
1968 /* As get_source_range_for_char, but write to *OUT the number
1969    of ranges that are available.  */
1970
1971 static const char *
1972 get_num_source_ranges_for_substring (cpp_reader *pfile,
1973                                      string_concat_db *concats,
1974                                      location_t strloc,
1975                                      enum cpp_ttype type,
1976                                      int *out)
1977 {
1978   gcc_assert (out);
1979
1980   cpp_substring_ranges ranges;
1981   const char *err
1982     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1983
1984   if (err)
1985     return err;
1986
1987   *out = ranges.get_num_ranges ();
1988   return NULL;
1989 }
1990
1991 /* Selftests of location handling.  */
1992
1993 /* Verify that compare() on linenum_type handles comparisons over the full
1994    range of the type.  */
1995
1996 static void
1997 test_linenum_comparisons ()
1998 {
1999   linenum_type min_line (0);
2000   linenum_type max_line (0xffffffff);
2001   ASSERT_EQ (0, compare (min_line, min_line));
2002   ASSERT_EQ (0, compare (max_line, max_line));
2003
2004   ASSERT_GT (compare (max_line, min_line), 0);
2005   ASSERT_LT (compare (min_line, max_line), 0);
2006 }
2007
2008 /* Helper function for verifying location data: when location_t
2009    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
2010    as having column 0.  */
2011
2012 static bool
2013 should_have_column_data_p (location_t loc)
2014 {
2015   if (IS_ADHOC_LOC (loc))
2016     loc = get_location_from_adhoc_loc (line_table, loc);
2017   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
2018     return false;
2019   return true;
2020 }
2021
2022 /* Selftest for should_have_column_data_p.  */
2023
2024 static void
2025 test_should_have_column_data_p ()
2026 {
2027   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
2028   ASSERT_TRUE
2029     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
2030   ASSERT_FALSE
2031     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
2032 }
2033
2034 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
2035    on LOC.  */
2036
2037 static void
2038 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
2039               location_t loc)
2040 {
2041   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
2042   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
2043   /* If location_t values are sufficiently high, then column numbers
2044      will be unavailable and LOCATION_COLUMN (loc) will be 0.
2045      When close to the threshold, column numbers *may* be present: if
2046      the final linemap before the threshold contains a line that straddles
2047      the threshold, locations in that line have column information.  */
2048   if (should_have_column_data_p (loc))
2049     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
2050 }
2051
2052 /* Various selftests involve constructing a line table and one or more
2053    line maps within it.
2054
2055    For maximum test coverage we want to run these tests with a variety
2056    of situations:
2057    - line_table->default_range_bits: some frontends use a non-zero value
2058    and others use zero
2059    - the fallback modes within line-map.cc: there are various threshold
2060    values for location_t beyond line-map.cc changes
2061    behavior (disabling of the range-packing optimization, disabling
2062    of column-tracking).  We can exercise these by starting the line_table
2063    at interesting values at or near these thresholds.
2064
2065    The following struct describes a particular case within our test
2066    matrix.  */
2067
2068 class line_table_case
2069 {
2070 public:
2071   line_table_case (int default_range_bits, int base_location)
2072   : m_default_range_bits (default_range_bits),
2073     m_base_location (base_location)
2074   {}
2075
2076   int m_default_range_bits;
2077   int m_base_location;
2078 };
2079
2080 /* Constructor.  Store the old value of line_table, and create a new
2081    one, using sane defaults.  */
2082
2083 line_table_test::line_table_test ()
2084 {
2085   gcc_assert (saved_line_table == NULL);
2086   saved_line_table = line_table;
2087   line_table = ggc_alloc<line_maps> ();
2088   linemap_init (line_table, BUILTINS_LOCATION);
2089   gcc_assert (saved_line_table->reallocator);
2090   line_table->reallocator = saved_line_table->reallocator;
2091   gcc_assert (saved_line_table->round_alloc_size);
2092   line_table->round_alloc_size = saved_line_table->round_alloc_size;
2093   line_table->default_range_bits = 0;
2094 }
2095
2096 /* Constructor.  Store the old value of line_table, and create a new
2097    one, using the sitation described in CASE_.  */
2098
2099 line_table_test::line_table_test (const line_table_case &case_)
2100 {
2101   gcc_assert (saved_line_table == NULL);
2102   saved_line_table = line_table;
2103   line_table = ggc_alloc<line_maps> ();
2104   linemap_init (line_table, BUILTINS_LOCATION);
2105   gcc_assert (saved_line_table->reallocator);
2106   line_table->reallocator = saved_line_table->reallocator;
2107   gcc_assert (saved_line_table->round_alloc_size);
2108   line_table->round_alloc_size = saved_line_table->round_alloc_size;
2109   line_table->default_range_bits = case_.m_default_range_bits;
2110   if (case_.m_base_location)
2111     {
2112       line_table->highest_location = case_.m_base_location;
2113       line_table->highest_line = case_.m_base_location;
2114     }
2115 }
2116
2117 /* Destructor.  Restore the old value of line_table.  */
2118
2119 line_table_test::~line_table_test ()
2120 {
2121   gcc_assert (saved_line_table != NULL);
2122   line_table = saved_line_table;
2123   saved_line_table = NULL;
2124 }
2125
2126 /* Verify basic operation of ordinary linemaps.  */
2127
2128 static void
2129 test_accessing_ordinary_linemaps (const line_table_case &case_)
2130 {
2131   line_table_test ltt (case_);
2132
2133   /* Build a simple linemap describing some locations. */
2134   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
2135
2136   linemap_line_start (line_table, 1, 100);
2137   location_t loc_a = linemap_position_for_column (line_table, 1);
2138   location_t loc_b = linemap_position_for_column (line_table, 23);
2139
2140   linemap_line_start (line_table, 2, 100);
2141   location_t loc_c = linemap_position_for_column (line_table, 1);
2142   location_t loc_d = linemap_position_for_column (line_table, 17);
2143
2144   /* Example of a very long line.  */
2145   linemap_line_start (line_table, 3, 2000);
2146   location_t loc_e = linemap_position_for_column (line_table, 700);
2147
2148   /* Transitioning back to a short line.  */
2149   linemap_line_start (line_table, 4, 0);
2150   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
2151
2152   if (should_have_column_data_p (loc_back_to_short))
2153     {
2154       /* Verify that we switched to short lines in the linemap.  */
2155       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
2156       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
2157     }
2158
2159   /* Example of a line that will eventually be seen to be longer
2160      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
2161      below that.  */
2162   linemap_line_start (line_table, 5, 2000);
2163
2164   location_t loc_start_of_very_long_line
2165     = linemap_position_for_column (line_table, 2000);
2166   location_t loc_too_wide
2167     = linemap_position_for_column (line_table, 4097);
2168   location_t loc_too_wide_2
2169     = linemap_position_for_column (line_table, 4098);
2170
2171   /* ...and back to a sane line length.  */
2172   linemap_line_start (line_table, 6, 100);
2173   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
2174
2175   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2176
2177   /* Multiple files.  */
2178   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
2179   linemap_line_start (line_table, 1, 200);
2180   location_t loc_f = linemap_position_for_column (line_table, 150);
2181   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2182
2183   /* Verify that we can recover the location info.  */
2184   assert_loceq ("foo.c", 1, 1, loc_a);
2185   assert_loceq ("foo.c", 1, 23, loc_b);
2186   assert_loceq ("foo.c", 2, 1, loc_c);
2187   assert_loceq ("foo.c", 2, 17, loc_d);
2188   assert_loceq ("foo.c", 3, 700, loc_e);
2189   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
2190
2191   /* In the very wide line, the initial location should be fully tracked.  */
2192   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
2193   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
2194      be disabled.  */
2195   assert_loceq ("foo.c", 5, 0, loc_too_wide);
2196   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
2197   /*...and column-tracking should be re-enabled for subsequent lines.  */
2198   assert_loceq ("foo.c", 6, 10, loc_sane_again);
2199
2200   assert_loceq ("bar.c", 1, 150, loc_f);
2201
2202   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2203   ASSERT_TRUE (pure_location_p (line_table, loc_a));
2204
2205   /* Verify using make_location to build a range, and extracting data
2206      back from it.  */
2207   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2208   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2209   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2210   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2211   ASSERT_EQ (loc_b, src_range.m_start);
2212   ASSERT_EQ (loc_d, src_range.m_finish);
2213 }
2214
2215 /* Verify various properties of UNKNOWN_LOCATION.  */
2216
2217 static void
2218 test_unknown_location ()
2219 {
2220   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2221   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2222   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2223 }
2224
2225 /* Verify various properties of BUILTINS_LOCATION.  */
2226
2227 static void
2228 test_builtins ()
2229 {
2230   assert_loceq (special_fname_builtin (), 0, 0, BUILTINS_LOCATION);
2231   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2232 }
2233
2234 /* Regression test for make_location.
2235    Ensure that we use pure locations for the start/finish of the range,
2236    rather than storing a packed or ad-hoc range as the start/finish.  */
2237
2238 static void
2239 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2240 {
2241   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2242      with C++ frontend.
2243      ....................0000000001111111111222.
2244      ....................1234567890123456789012.  */
2245   const char *content = "     r += !aaa == bbb;\n";
2246   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2247   line_table_test ltt (case_);
2248   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2249
2250   const location_t c11 = linemap_position_for_column (line_table, 11);
2251   const location_t c12 = linemap_position_for_column (line_table, 12);
2252   const location_t c13 = linemap_position_for_column (line_table, 13);
2253   const location_t c14 = linemap_position_for_column (line_table, 14);
2254   const location_t c21 = linemap_position_for_column (line_table, 21);
2255
2256   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2257     return;
2258
2259   /* Use column 13 for the caret location, arbitrarily, to verify that we
2260      handle start != caret.  */
2261   const location_t aaa = make_location (c13, c12, c14);
2262   ASSERT_EQ (c13, get_pure_location (aaa));
2263   ASSERT_EQ (c12, get_start (aaa));
2264   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2265   ASSERT_EQ (c14, get_finish (aaa));
2266   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2267
2268   /* Make a location using a location with a range as the start-point.  */
2269   const location_t not_aaa = make_location (c11, aaa, c14);
2270   ASSERT_EQ (c11, get_pure_location (not_aaa));
2271   /* It should use the start location of the range, not store the range
2272      itself.  */
2273   ASSERT_EQ (c12, get_start (not_aaa));
2274   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2275   ASSERT_EQ (c14, get_finish (not_aaa));
2276   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2277
2278   /* Similarly, make a location with a range as the end-point.  */
2279   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2280   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2281   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2282   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2283   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2284   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2285   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2286   /* It should use the finish location of the range, not store the range
2287      itself.  */
2288   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2289   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2290   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2291   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2292   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2293 }
2294
2295 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
2296
2297 static void
2298 test_reading_source_line ()
2299 {
2300   /* Create a tempfile and write some text to it.  */
2301   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2302                         "01234567890123456789\n"
2303                         "This is the test text\n"
2304                         "This is the 3rd line");
2305
2306   /* Read back a specific line from the tempfile.  */
2307   char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2308   ASSERT_TRUE (source_line);
2309   ASSERT_TRUE (source_line.get_buffer () != NULL);
2310   ASSERT_EQ (20, source_line.length ());
2311   ASSERT_TRUE (!strncmp ("This is the 3rd line",
2312                          source_line.get_buffer (), source_line.length ()));
2313
2314   source_line = location_get_source_line (tmp.get_filename (), 2);
2315   ASSERT_TRUE (source_line);
2316   ASSERT_TRUE (source_line.get_buffer () != NULL);
2317   ASSERT_EQ (21, source_line.length ());
2318   ASSERT_TRUE (!strncmp ("This is the test text",
2319                          source_line.get_buffer (), source_line.length ()));
2320
2321   source_line = location_get_source_line (tmp.get_filename (), 4);
2322   ASSERT_FALSE (source_line);
2323   ASSERT_TRUE (source_line.get_buffer () == NULL);
2324 }
2325
2326 /* Tests of lexing.  */
2327
2328 /* Verify that token TOK from PARSER has cpp_token_as_text
2329    equal to EXPECTED_TEXT.  */
2330
2331 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
2332   SELFTEST_BEGIN_STMT                                                   \
2333     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
2334     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
2335   SELFTEST_END_STMT
2336
2337 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2338    and ranges from EXP_START_COL to EXP_FINISH_COL.
2339    Use LOC as the effective location of the selftest.  */
2340
2341 static void
2342 assert_token_loc_eq (const location &loc,
2343                      const cpp_token *tok,
2344                      const char *exp_filename, int exp_linenum,
2345                      int exp_start_col, int exp_finish_col)
2346 {
2347   location_t tok_loc = tok->src_loc;
2348   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2349   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2350
2351   /* If location_t values are sufficiently high, then column numbers
2352      will be unavailable.  */
2353   if (!should_have_column_data_p (tok_loc))
2354     return;
2355
2356   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2357   source_range tok_range = get_range_from_loc (line_table, tok_loc);
2358   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2359   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2360 }
2361
2362 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2363    SELFTEST_LOCATION as the effective location of the selftest.  */
2364
2365 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2366                             EXP_START_COL, EXP_FINISH_COL) \
2367   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2368                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2369
2370 /* Test of lexing a file using libcpp, verifying tokens and their
2371    location information.  */
2372
2373 static void
2374 test_lexer (const line_table_case &case_)
2375 {
2376   /* Create a tempfile and write some text to it.  */
2377   const char *content =
2378     /*00000000011111111112222222222333333.3333444444444.455555555556
2379       12345678901234567890123456789012345.6789012345678.901234567890.  */
2380     ("test_name /* c-style comment */\n"
2381      "                                  \"test literal\"\n"
2382      " // test c++-style comment\n"
2383      "   42\n");
2384   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2385
2386   line_table_test ltt (case_);
2387
2388   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2389
2390   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2391   ASSERT_NE (fname, NULL);
2392
2393   /* Verify that we get the expected tokens back, with the correct
2394      location information.  */
2395
2396   location_t loc;
2397   const cpp_token *tok;
2398   tok = cpp_get_token_with_location (parser, &loc);
2399   ASSERT_NE (tok, NULL);
2400   ASSERT_EQ (tok->type, CPP_NAME);
2401   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2402   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2403
2404   tok = cpp_get_token_with_location (parser, &loc);
2405   ASSERT_NE (tok, NULL);
2406   ASSERT_EQ (tok->type, CPP_STRING);
2407   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2408   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2409
2410   tok = cpp_get_token_with_location (parser, &loc);
2411   ASSERT_NE (tok, NULL);
2412   ASSERT_EQ (tok->type, CPP_NUMBER);
2413   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2414   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2415
2416   tok = cpp_get_token_with_location (parser, &loc);
2417   ASSERT_NE (tok, NULL);
2418   ASSERT_EQ (tok->type, CPP_EOF);
2419
2420   cpp_finish (parser, NULL);
2421   cpp_destroy (parser);
2422 }
2423
2424 /* Forward decls.  */
2425
2426 class lexer_test;
2427 class lexer_test_options;
2428
2429 /* A class for specifying options of a lexer_test.
2430    The "apply" vfunc is called during the lexer_test constructor.  */
2431
2432 class lexer_test_options
2433 {
2434  public:
2435   virtual void apply (lexer_test &) = 0;
2436 };
2437
2438 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2439    in its dtor.
2440
2441    This is needed by struct lexer_test to ensure that the cleanup of the
2442    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2443
2444 class cpp_reader_ptr
2445 {
2446  public:
2447   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2448
2449   ~cpp_reader_ptr ()
2450   {
2451     cpp_finish (m_ptr, NULL);
2452     cpp_destroy (m_ptr);
2453   }
2454
2455   operator cpp_reader * () const { return m_ptr; }
2456
2457  private:
2458   cpp_reader *m_ptr;
2459 };
2460
2461 /* A struct for writing lexer tests.  */
2462
2463 class lexer_test
2464 {
2465 public:
2466   lexer_test (const line_table_case &case_, const char *content,
2467               lexer_test_options *options);
2468   ~lexer_test ();
2469
2470   const cpp_token *get_token ();
2471
2472   /* The ordering of these fields matters.
2473      The line_table_test must be first, since the cpp_reader_ptr
2474      uses it.
2475      The cpp_reader must be cleaned up *after* the temp_source_file
2476      since the filenames in input.cc's input cache are owned by the
2477      cpp_reader; in particular, when ~temp_source_file evicts the
2478      filename the filenames must still be alive.  */
2479   line_table_test m_ltt;
2480   cpp_reader_ptr m_parser;
2481   temp_source_file m_tempfile;
2482   string_concat_db m_concats;
2483   bool m_implicitly_expect_EOF;
2484 };
2485
2486 /* Use an EBCDIC encoding for the execution charset, specifically
2487    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2488
2489    This exercises iconv integration within libcpp.
2490    Not every build of iconv supports the given charset,
2491    so we need to flag this error and handle it gracefully.  */
2492
2493 class ebcdic_execution_charset : public lexer_test_options
2494 {
2495  public:
2496   ebcdic_execution_charset () : m_num_iconv_errors (0)
2497     {
2498       gcc_assert (s_singleton == NULL);
2499       s_singleton = this;
2500     }
2501   ~ebcdic_execution_charset ()
2502     {
2503       gcc_assert (s_singleton == this);
2504       s_singleton = NULL;
2505     }
2506
2507   void apply (lexer_test &test) final override
2508   {
2509     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2510     cpp_opts->narrow_charset = "IBM1047";
2511
2512     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2513     callbacks->diagnostic = on_diagnostic;
2514   }
2515
2516   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2517                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2518                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2519                              rich_location *richloc ATTRIBUTE_UNUSED,
2520                              const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2521     ATTRIBUTE_FPTR_PRINTF(5,0)
2522   {
2523     gcc_assert (s_singleton);
2524     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2525     const char *msg = "conversion from %s to %s not supported by iconv";
2526 #ifdef ENABLE_NLS
2527     msg = dgettext ("cpplib", msg);
2528 #endif
2529     /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
2530        when the local iconv build doesn't support the conversion.  */
2531     if (strcmp (msgid, msg) == 0)
2532       {
2533         s_singleton->m_num_iconv_errors++;
2534         return true;
2535       }
2536
2537     /* Otherwise, we have an unexpected error.  */
2538     abort ();
2539   }
2540
2541   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2542
2543  private:
2544   static ebcdic_execution_charset *s_singleton;
2545   int m_num_iconv_errors;
2546 };
2547
2548 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2549
2550 /* A lexer_test_options subclass that records a list of diagnostic
2551    messages emitted by the lexer.  */
2552
2553 class lexer_diagnostic_sink : public lexer_test_options
2554 {
2555  public:
2556   lexer_diagnostic_sink ()
2557   {
2558     gcc_assert (s_singleton == NULL);
2559     s_singleton = this;
2560   }
2561   ~lexer_diagnostic_sink ()
2562   {
2563     gcc_assert (s_singleton == this);
2564     s_singleton = NULL;
2565
2566     int i;
2567     char *str;
2568     FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2569       free (str);
2570   }
2571
2572   void apply (lexer_test &test) final override
2573   {
2574     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2575     callbacks->diagnostic = on_diagnostic;
2576   }
2577
2578   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2579                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2580                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2581                              rich_location *richloc ATTRIBUTE_UNUSED,
2582                              const char *msgid, va_list *ap)
2583     ATTRIBUTE_FPTR_PRINTF(5,0)
2584   {
2585     char *msg = xvasprintf (msgid, *ap);
2586     s_singleton->m_diagnostics.safe_push (msg);
2587     return true;
2588   }
2589
2590   auto_vec<char *> m_diagnostics;
2591
2592  private:
2593   static lexer_diagnostic_sink *s_singleton;
2594 };
2595
2596 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2597
2598 /* Constructor.  Override line_table with a new instance based on CASE_,
2599    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2600    start parsing the tempfile.  */
2601
2602 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2603                         lexer_test_options *options)
2604 : m_ltt (case_),
2605   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2606   /* Create a tempfile and write the text to it.  */
2607   m_tempfile (SELFTEST_LOCATION, ".c", content),
2608   m_concats (),
2609   m_implicitly_expect_EOF (true)
2610 {
2611   if (options)
2612     options->apply (*this);
2613
2614   cpp_init_iconv (m_parser);
2615
2616   /* Parse the file.  */
2617   const char *fname = cpp_read_main_file (m_parser,
2618                                           m_tempfile.get_filename ());
2619   ASSERT_NE (fname, NULL);
2620 }
2621
2622 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2623
2624 lexer_test::~lexer_test ()
2625 {
2626   location_t loc;
2627   const cpp_token *tok;
2628
2629   if (m_implicitly_expect_EOF)
2630     {
2631       tok = cpp_get_token_with_location (m_parser, &loc);
2632       ASSERT_NE (tok, NULL);
2633       ASSERT_EQ (tok->type, CPP_EOF);
2634     }
2635 }
2636
2637 /* Get the next token from m_parser.  */
2638
2639 const cpp_token *
2640 lexer_test::get_token ()
2641 {
2642   location_t loc;
2643   const cpp_token *tok;
2644
2645   tok = cpp_get_token_with_location (m_parser, &loc);
2646   ASSERT_NE (tok, NULL);
2647   return tok;
2648 }
2649
2650 /* Verify that locations within string literals are correctly handled.  */
2651
2652 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2653    using the string concatenation database for TEST.
2654
2655    Assert that the character at index IDX is on EXPECTED_LINE,
2656    and that it begins at column EXPECTED_START_COL and ends at
2657    EXPECTED_FINISH_COL (unless the locations are beyond
2658    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2659    columns).  */
2660
2661 static void
2662 assert_char_at_range (const location &loc,
2663                       lexer_test& test,
2664                       location_t strloc, enum cpp_ttype type, int idx,
2665                       int expected_line, int expected_start_col,
2666                       int expected_finish_col)
2667 {
2668   cpp_reader *pfile = test.m_parser;
2669   string_concat_db *concats = &test.m_concats;
2670
2671   source_range actual_range = source_range();
2672   const char *err
2673     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2674                                  &actual_range);
2675   if (should_have_column_data_p (strloc))
2676     ASSERT_EQ_AT (loc, NULL, err);
2677   else
2678     {
2679       ASSERT_STREQ_AT (loc,
2680                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2681                        err);
2682       return;
2683     }
2684
2685   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2686   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2687   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2688   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2689
2690   if (should_have_column_data_p (actual_range.m_start))
2691     {
2692       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2693       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2694     }
2695   if (should_have_column_data_p (actual_range.m_finish))
2696     {
2697       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2698       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2699     }
2700 }
2701
2702 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2703    the effective location of any errors.  */
2704
2705 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2706                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2707   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2708                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2709                         (EXPECTED_FINISH_COL))
2710
2711 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2712    using the string concatenation database for TEST.
2713
2714    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2715
2716 static void
2717 assert_num_substring_ranges (const location &loc,
2718                              lexer_test& test,
2719                              location_t strloc,
2720                              enum cpp_ttype type,
2721                              int expected_num_ranges)
2722 {
2723   cpp_reader *pfile = test.m_parser;
2724   string_concat_db *concats = &test.m_concats;
2725
2726   int actual_num_ranges = -1;
2727   const char *err
2728     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2729                                            &actual_num_ranges);
2730   if (should_have_column_data_p (strloc))
2731     ASSERT_EQ_AT (loc, NULL, err);
2732   else
2733     {
2734       ASSERT_STREQ_AT (loc,
2735                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2736                        err);
2737       return;
2738     }
2739   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2740 }
2741
2742 /* Macro for calling assert_num_substring_ranges, supplying
2743    SELFTEST_LOCATION for the effective location of any errors.  */
2744
2745 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2746                                     EXPECTED_NUM_RANGES)                \
2747   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2748                                (TYPE), (EXPECTED_NUM_RANGES))
2749
2750
2751 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2752    returns an error (using the string concatenation database for TEST).  */
2753
2754 static void
2755 assert_has_no_substring_ranges (const location &loc,
2756                                 lexer_test& test,
2757                                 location_t strloc,
2758                                 enum cpp_ttype type,
2759                                 const char *expected_err)
2760 {
2761   cpp_reader *pfile = test.m_parser;
2762   string_concat_db *concats = &test.m_concats;
2763   cpp_substring_ranges ranges;
2764   const char *actual_err
2765     = get_substring_ranges_for_loc (pfile, concats, strloc,
2766                                     type, ranges);
2767   if (should_have_column_data_p (strloc))
2768     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2769   else
2770     ASSERT_STREQ_AT (loc,
2771                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2772                      actual_err);
2773 }
2774
2775 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2776     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2777                                     (STRLOC), (TYPE), (ERR))
2778
2779 /* Lex a simple string literal.  Verify the substring location data, before
2780    and after running cpp_interpret_string on it.  */
2781
2782 static void
2783 test_lexer_string_locations_simple (const line_table_case &case_)
2784 {
2785   /* Digits 0-9 (with 0 at column 10), the simple way.
2786      ....................000000000.11111111112.2222222223333333333
2787      ....................123456789.01234567890.1234567890123456789
2788      We add a trailing comment to ensure that we correctly locate
2789      the end of the string literal token.  */
2790   const char *content = "        \"0123456789\" /* not a string */\n";
2791   lexer_test test (case_, content, NULL);
2792
2793   /* Verify that we get the expected token back, with the correct
2794      location information.  */
2795   const cpp_token *tok = test.get_token ();
2796   ASSERT_EQ (tok->type, CPP_STRING);
2797   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2798   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2799
2800   /* At this point in lexing, the quote characters are treated as part of
2801      the string (they are stripped off by cpp_interpret_string).  */
2802
2803   ASSERT_EQ (tok->val.str.len, 12);
2804
2805   /* Verify that cpp_interpret_string works.  */
2806   cpp_string dst_string;
2807   const enum cpp_ttype type = CPP_STRING;
2808   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2809                                       &dst_string, type);
2810   ASSERT_TRUE (result);
2811   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2812   free (const_cast <unsigned char *> (dst_string.text));
2813
2814   /* Verify ranges of individual characters.  This no longer includes the
2815      opening quote, but does include the closing quote.  */
2816   for (int i = 0; i <= 10; i++)
2817     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2818                           10 + i, 10 + i);
2819
2820   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2821 }
2822
2823 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2824    encoding.  */
2825
2826 static void
2827 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2828 {
2829   /* EBCDIC support requires iconv.  */
2830   if (!HAVE_ICONV)
2831     return;
2832
2833   /* Digits 0-9 (with 0 at column 10), the simple way.
2834      ....................000000000.11111111112.2222222223333333333
2835      ....................123456789.01234567890.1234567890123456789
2836      We add a trailing comment to ensure that we correctly locate
2837      the end of the string literal token.  */
2838   const char *content = "        \"0123456789\" /* not a string */\n";
2839   ebcdic_execution_charset use_ebcdic;
2840   lexer_test test (case_, content, &use_ebcdic);
2841
2842   /* Verify that we get the expected token back, with the correct
2843      location information.  */
2844   const cpp_token *tok = test.get_token ();
2845   ASSERT_EQ (tok->type, CPP_STRING);
2846   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2847   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2848
2849   /* At this point in lexing, the quote characters are treated as part of
2850      the string (they are stripped off by cpp_interpret_string).  */
2851
2852   ASSERT_EQ (tok->val.str.len, 12);
2853
2854   /* The remainder of the test requires an iconv implementation that
2855      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2856   if (use_ebcdic.iconv_errors_occurred_p ())
2857     return;
2858
2859   /* Verify that cpp_interpret_string works.  */
2860   cpp_string dst_string;
2861   const enum cpp_ttype type = CPP_STRING;
2862   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2863                                       &dst_string, type);
2864   ASSERT_TRUE (result);
2865   /* We should now have EBCDIC-encoded text, specifically
2866      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2867      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2868   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2869                 (const char *)dst_string.text);
2870   free (const_cast <unsigned char *> (dst_string.text));
2871
2872   /* Verify that we don't attempt to record substring location information
2873      for such cases.  */
2874   ASSERT_HAS_NO_SUBSTRING_RANGES
2875     (test, tok->src_loc, type,
2876      "execution character set != source character set");
2877 }
2878
2879 /* Lex a string literal containing a hex-escaped character.
2880    Verify the substring location data, before and after running
2881    cpp_interpret_string on it.  */
2882
2883 static void
2884 test_lexer_string_locations_hex (const line_table_case &case_)
2885 {
2886   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2887      and with a space in place of digit 6, to terminate the escaped
2888      hex code.
2889      ....................000000000.111111.11112222.
2890      ....................123456789.012345.67890123.  */
2891   const char *content = "        \"01234\\x35 789\"\n";
2892   lexer_test test (case_, content, NULL);
2893
2894   /* Verify that we get the expected token back, with the correct
2895      location information.  */
2896   const cpp_token *tok = test.get_token ();
2897   ASSERT_EQ (tok->type, CPP_STRING);
2898   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2899   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2900
2901   /* At this point in lexing, the quote characters are treated as part of
2902      the string (they are stripped off by cpp_interpret_string).  */
2903   ASSERT_EQ (tok->val.str.len, 15);
2904
2905   /* Verify that cpp_interpret_string works.  */
2906   cpp_string dst_string;
2907   const enum cpp_ttype type = CPP_STRING;
2908   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2909                                       &dst_string, type);
2910   ASSERT_TRUE (result);
2911   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2912   free (const_cast <unsigned char *> (dst_string.text));
2913
2914   /* Verify ranges of individual characters.  This no longer includes the
2915      opening quote, but does include the closing quote.  */
2916   for (int i = 0; i <= 4; i++)
2917     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2918   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2919   for (int i = 6; i <= 10; i++)
2920     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2921
2922   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2923 }
2924
2925 /* Lex a string literal containing an octal-escaped character.
2926    Verify the substring location data after running cpp_interpret_string
2927    on it.  */
2928
2929 static void
2930 test_lexer_string_locations_oct (const line_table_case &case_)
2931 {
2932   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2933      and with a space in place of digit 6, to terminate the escaped
2934      octal code.
2935      ....................000000000.111111.11112222.2222223333333333444
2936      ....................123456789.012345.67890123.4567890123456789012  */
2937   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2938   lexer_test test (case_, content, NULL);
2939
2940   /* Verify that we get the expected token back, with the correct
2941      location information.  */
2942   const cpp_token *tok = test.get_token ();
2943   ASSERT_EQ (tok->type, CPP_STRING);
2944   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2945
2946   /* Verify that cpp_interpret_string works.  */
2947   cpp_string dst_string;
2948   const enum cpp_ttype type = CPP_STRING;
2949   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2950                                       &dst_string, type);
2951   ASSERT_TRUE (result);
2952   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2953   free (const_cast <unsigned char *> (dst_string.text));
2954
2955   /* Verify ranges of individual characters.  This no longer includes the
2956      opening quote, but does include the closing quote.  */
2957   for (int i = 0; i < 5; i++)
2958     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2959   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2960   for (int i = 6; i <= 10; i++)
2961     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2962
2963   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2964 }
2965
2966 /* Test of string literal containing letter escapes.  */
2967
2968 static void
2969 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2970 {
2971   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2972      .....................000000000.1.11111.1.1.11222.22222223333333
2973      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2974   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2975   lexer_test test (case_, content, NULL);
2976
2977   /* Verify that we get the expected tokens back.  */
2978   const cpp_token *tok = test.get_token ();
2979   ASSERT_EQ (tok->type, CPP_STRING);
2980   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2981
2982   /* Verify ranges of individual characters. */
2983   /* "\t".  */
2984   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2985                         0, 1, 10, 11);
2986   /* "foo". */
2987   for (int i = 1; i <= 3; i++)
2988     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2989                           i, 1, 11 + i, 11 + i);
2990   /* "\\" and "\n".  */
2991   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2992                         4, 1, 15, 16);
2993   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2994                         5, 1, 17, 18);
2995
2996   /* "bar" and closing quote for nul-terminator.  */
2997   for (int i = 6; i <= 9; i++)
2998     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2999                           i, 1, 13 + i, 13 + i);
3000
3001   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
3002 }
3003
3004 /* Another test of a string literal containing a letter escape.
3005    Based on string seen in
3006      printf ("%-%\n");
3007    in gcc.dg/format/c90-printf-1.c.  */
3008
3009 static void
3010 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
3011 {
3012   /* .....................000000000.1111.11.1111.22222222223.
3013      .....................123456789.0123.45.6789.01234567890.  */
3014   const char *content = ("        \"%-%\\n\" /* non-str */\n");
3015   lexer_test test (case_, content, NULL);
3016
3017   /* Verify that we get the expected tokens back.  */
3018   const cpp_token *tok = test.get_token ();
3019   ASSERT_EQ (tok->type, CPP_STRING);
3020   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
3021
3022   /* Verify ranges of individual characters. */
3023   /* "%-%".  */
3024   for (int i = 0; i < 3; i++)
3025     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3026                           i, 1, 10 + i, 10 + i);
3027   /* "\n".  */
3028   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3029                         3, 1, 13, 14);
3030
3031   /* Closing quote for nul-terminator.  */
3032   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3033                         4, 1, 15, 15);
3034
3035   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
3036 }
3037
3038 /* Lex a string literal containing UCN 4 characters.
3039    Verify the substring location data after running cpp_interpret_string
3040    on it.  */
3041
3042 static void
3043 test_lexer_string_locations_ucn4 (const line_table_case &case_)
3044 {
3045   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
3046      as UCN 4.
3047      ....................000000000.111111.111122.222222223.33333333344444
3048      ....................123456789.012345.678901.234567890.12345678901234  */
3049   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
3050   lexer_test test (case_, content, NULL);
3051
3052   /* Verify that we get the expected token back, with the correct
3053      location information.  */
3054   const cpp_token *tok = test.get_token ();
3055   ASSERT_EQ (tok->type, CPP_STRING);
3056   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
3057
3058   /* Verify that cpp_interpret_string works.
3059      The string should be encoded in the execution character
3060      set.  Assuming that is UTF-8, we should have the following:
3061      -----------  ----  -----  -------  ----------------
3062      Byte offset  Byte  Octal  Unicode  Source Column(s)
3063      -----------  ----  -----  -------  ----------------
3064      0            0x30         '0'      10
3065      1            0x31         '1'      11
3066      2            0x32         '2'      12
3067      3            0x33         '3'      13
3068      4            0x34         '4'      14
3069      5            0xE2  \342   U+2174   15-20
3070      6            0x85  \205    (cont)  15-20
3071      7            0xB4  \264    (cont)  15-20
3072      8            0xE2  \342   U+2175   21-26
3073      9            0x85  \205    (cont)  21-26
3074      10           0xB5  \265    (cont)  21-26
3075      11           0x37         '7'      27
3076      12           0x38         '8'      28
3077      13           0x39         '9'      29
3078      14           0x00                  30 (closing quote)
3079      -----------  ----  -----  -------  ---------------.  */
3080
3081   cpp_string dst_string;
3082   const enum cpp_ttype type = CPP_STRING;
3083   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3084                                       &dst_string, type);
3085   ASSERT_TRUE (result);
3086   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3087                 (const char *)dst_string.text);
3088   free (const_cast <unsigned char *> (dst_string.text));
3089
3090   /* Verify ranges of individual characters.  This no longer includes the
3091      opening quote, but does include the closing quote.
3092      '01234'.  */
3093   for (int i = 0; i <= 4; i++)
3094     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3095   /* U+2174.  */
3096   for (int i = 5; i <= 7; i++)
3097     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
3098   /* U+2175.  */
3099   for (int i = 8; i <= 10; i++)
3100     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
3101   /* '789' and nul terminator  */
3102   for (int i = 11; i <= 14; i++)
3103     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
3104
3105   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3106 }
3107
3108 /* Lex a string literal containing UCN 8 characters.
3109    Verify the substring location data after running cpp_interpret_string
3110    on it.  */
3111
3112 static void
3113 test_lexer_string_locations_ucn8 (const line_table_case &case_)
3114 {
3115   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
3116      ....................000000000.111111.1111222222.2222333333333.344444
3117      ....................123456789.012345.6789012345.6789012345678.901234  */
3118   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
3119   lexer_test test (case_, content, NULL);
3120
3121   /* Verify that we get the expected token back, with the correct
3122      location information.  */
3123   const cpp_token *tok = test.get_token ();
3124   ASSERT_EQ (tok->type, CPP_STRING);
3125   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
3126                            "\"01234\\U00002174\\U00002175789\"");
3127
3128   /* Verify that cpp_interpret_string works.
3129      The UTF-8 encoding of the string is identical to that from
3130      the ucn4 testcase above; the only difference is the column
3131      locations.  */
3132   cpp_string dst_string;
3133   const enum cpp_ttype type = CPP_STRING;
3134   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3135                                       &dst_string, type);
3136   ASSERT_TRUE (result);
3137   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3138                 (const char *)dst_string.text);
3139   free (const_cast <unsigned char *> (dst_string.text));
3140
3141   /* Verify ranges of individual characters.  This no longer includes the
3142      opening quote, but does include the closing quote.
3143      '01234'.  */
3144   for (int i = 0; i <= 4; i++)
3145     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3146   /* U+2174.  */
3147   for (int i = 5; i <= 7; i++)
3148     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
3149   /* U+2175.  */
3150   for (int i = 8; i <= 10; i++)
3151     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
3152   /* '789' at columns 35-37  */
3153   for (int i = 11; i <= 13; i++)
3154     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
3155   /* Closing quote/nul-terminator at column 38.  */
3156   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
3157
3158   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3159 }
3160
3161 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
3162
3163 static uint32_t
3164 uint32_from_big_endian (const uint32_t *ptr_be_value)
3165 {
3166   const unsigned char *buf = (const unsigned char *)ptr_be_value;
3167   return (((uint32_t) buf[0] << 24)
3168           | ((uint32_t) buf[1] << 16)
3169           | ((uint32_t) buf[2] << 8)
3170           | (uint32_t) buf[3]);
3171 }
3172
3173 /* Lex a wide string literal and verify that attempts to read substring
3174    location data from it fail gracefully.  */
3175
3176 static void
3177 test_lexer_string_locations_wide_string (const line_table_case &case_)
3178 {
3179   /* Digits 0-9.
3180      ....................000000000.11111111112.22222222233333
3181      ....................123456789.01234567890.12345678901234  */
3182   const char *content = "       L\"0123456789\" /* non-str */\n";
3183   lexer_test test (case_, content, NULL);
3184
3185   /* Verify that we get the expected token back, with the correct
3186      location information.  */
3187   const cpp_token *tok = test.get_token ();
3188   ASSERT_EQ (tok->type, CPP_WSTRING);
3189   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
3190
3191   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
3192   cpp_string dst_string;
3193   const enum cpp_ttype type = CPP_WSTRING;
3194   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3195                                       &dst_string, type);
3196   ASSERT_TRUE (result);
3197   /* The cpp_reader defaults to big-endian with
3198      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3199      now be encoded as UTF-32BE.  */
3200   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3201   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3202   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3203   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3204   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3205   free (const_cast <unsigned char *> (dst_string.text));
3206
3207   /* We don't yet support generating substring location information
3208      for L"" strings.  */
3209   ASSERT_HAS_NO_SUBSTRING_RANGES
3210     (test, tok->src_loc, type,
3211      "execution character set != source character set");
3212 }
3213
3214 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
3215
3216 static uint16_t
3217 uint16_from_big_endian (const uint16_t *ptr_be_value)
3218 {
3219   const unsigned char *buf = (const unsigned char *)ptr_be_value;
3220   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3221 }
3222
3223 /* Lex a u"" string literal and verify that attempts to read substring
3224    location data from it fail gracefully.  */
3225
3226 static void
3227 test_lexer_string_locations_string16 (const line_table_case &case_)
3228 {
3229   /* Digits 0-9.
3230      ....................000000000.11111111112.22222222233333
3231      ....................123456789.01234567890.12345678901234  */
3232   const char *content = "       u\"0123456789\" /* non-str */\n";
3233   lexer_test test (case_, content, NULL);
3234
3235   /* Verify that we get the expected token back, with the correct
3236      location information.  */
3237   const cpp_token *tok = test.get_token ();
3238   ASSERT_EQ (tok->type, CPP_STRING16);
3239   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3240
3241   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
3242   cpp_string dst_string;
3243   const enum cpp_ttype type = CPP_STRING16;
3244   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3245                                       &dst_string, type);
3246   ASSERT_TRUE (result);
3247
3248   /* The cpp_reader defaults to big-endian, so dst_string should
3249      now be encoded as UTF-16BE.  */
3250   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3251   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3252   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3253   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3254   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3255   free (const_cast <unsigned char *> (dst_string.text));
3256
3257   /* We don't yet support generating substring location information
3258      for L"" strings.  */
3259   ASSERT_HAS_NO_SUBSTRING_RANGES
3260     (test, tok->src_loc, type,
3261      "execution character set != source character set");
3262 }
3263
3264 /* Lex a U"" string literal and verify that attempts to read substring
3265    location data from it fail gracefully.  */
3266
3267 static void
3268 test_lexer_string_locations_string32 (const line_table_case &case_)
3269 {
3270   /* Digits 0-9.
3271      ....................000000000.11111111112.22222222233333
3272      ....................123456789.01234567890.12345678901234  */
3273   const char *content = "       U\"0123456789\" /* non-str */\n";
3274   lexer_test test (case_, content, NULL);
3275
3276   /* Verify that we get the expected token back, with the correct
3277      location information.  */
3278   const cpp_token *tok = test.get_token ();
3279   ASSERT_EQ (tok->type, CPP_STRING32);
3280   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3281
3282   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
3283   cpp_string dst_string;
3284   const enum cpp_ttype type = CPP_STRING32;
3285   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3286                                       &dst_string, type);
3287   ASSERT_TRUE (result);
3288
3289   /* The cpp_reader defaults to big-endian, so dst_string should
3290      now be encoded as UTF-32BE.  */
3291   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3292   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3293   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3294   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3295   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3296   free (const_cast <unsigned char *> (dst_string.text));
3297
3298   /* We don't yet support generating substring location information
3299      for L"" strings.  */
3300   ASSERT_HAS_NO_SUBSTRING_RANGES
3301     (test, tok->src_loc, type,
3302      "execution character set != source character set");
3303 }
3304
3305 /* Lex a u8-string literal.
3306    Verify the substring location data after running cpp_interpret_string
3307    on it.  */
3308
3309 static void
3310 test_lexer_string_locations_u8 (const line_table_case &case_)
3311 {
3312   /* Digits 0-9.
3313      ....................000000000.11111111112.22222222233333
3314      ....................123456789.01234567890.12345678901234  */
3315   const char *content = "      u8\"0123456789\" /* non-str */\n";
3316   lexer_test test (case_, content, NULL);
3317
3318   /* Verify that we get the expected token back, with the correct
3319      location information.  */
3320   const cpp_token *tok = test.get_token ();
3321   ASSERT_EQ (tok->type, CPP_UTF8STRING);
3322   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3323
3324   /* Verify that cpp_interpret_string works.  */
3325   cpp_string dst_string;
3326   const enum cpp_ttype type = CPP_STRING;
3327   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3328                                       &dst_string, type);
3329   ASSERT_TRUE (result);
3330   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3331   free (const_cast <unsigned char *> (dst_string.text));
3332
3333   /* Verify ranges of individual characters.  This no longer includes the
3334      opening quote, but does include the closing quote.  */
3335   for (int i = 0; i <= 10; i++)
3336     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3337 }
3338
3339 /* Lex a string literal containing UTF-8 source characters.
3340    Verify the substring location data after running cpp_interpret_string
3341    on it.  */
3342
3343 static void
3344 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3345 {
3346  /* This string literal is written out to the source file as UTF-8,
3347     and is of the form "before mojibake after", where "mojibake"
3348     is written as the following four unicode code points:
3349        U+6587 CJK UNIFIED IDEOGRAPH-6587
3350        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3351        U+5316 CJK UNIFIED IDEOGRAPH-5316
3352        U+3051 HIRAGANA LETTER KE.
3353      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3354      "before" and "after" are 1 byte per unicode character.
3355
3356      The numbering shown are "columns", which are *byte* numbers within
3357      the line, rather than unicode character numbers.
3358
3359      .................... 000000000.1111111.
3360      .................... 123456789.0123456.  */
3361   const char *content = ("        \"before "
3362                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3363                               UTF-8: 0xE6 0x96 0x87
3364                               C octal escaped UTF-8: \346\226\207
3365                             "column" numbers: 17-19.  */
3366                          "\346\226\207"
3367
3368                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3369                               UTF-8: 0xE5 0xAD 0x97
3370                               C octal escaped UTF-8: \345\255\227
3371                             "column" numbers: 20-22.  */
3372                          "\345\255\227"
3373
3374                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3375                               UTF-8: 0xE5 0x8C 0x96
3376                               C octal escaped UTF-8: \345\214\226
3377                             "column" numbers: 23-25.  */
3378                          "\345\214\226"
3379
3380                          /* U+3051 HIRAGANA LETTER KE
3381                               UTF-8: 0xE3 0x81 0x91
3382                               C octal escaped UTF-8: \343\201\221
3383                             "column" numbers: 26-28.  */
3384                          "\343\201\221"
3385
3386                          /* column numbers 29 onwards
3387                           2333333.33334444444444
3388                           9012345.67890123456789. */
3389                          " after\" /* non-str */\n");
3390   lexer_test test (case_, content, NULL);
3391
3392   /* Verify that we get the expected token back, with the correct
3393      location information.  */
3394   const cpp_token *tok = test.get_token ();
3395   ASSERT_EQ (tok->type, CPP_STRING);
3396   ASSERT_TOKEN_AS_TEXT_EQ
3397     (test.m_parser, tok,
3398      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3399
3400   /* Verify that cpp_interpret_string works.  */
3401   cpp_string dst_string;
3402   const enum cpp_ttype type = CPP_STRING;
3403   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3404                                       &dst_string, type);
3405   ASSERT_TRUE (result);
3406   ASSERT_STREQ
3407     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3408      (const char *)dst_string.text);
3409   free (const_cast <unsigned char *> (dst_string.text));
3410
3411   /* Verify ranges of individual characters.  This no longer includes the
3412      opening quote, but does include the closing quote.
3413      Assuming that both source and execution encodings are UTF-8, we have
3414      a run of 25 octets in each, plus the NUL terminator.  */
3415   for (int i = 0; i < 25; i++)
3416     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3417   /* NUL-terminator should use the closing quote at column 35.  */
3418   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3419
3420   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3421 }
3422
3423 /* Test of string literal concatenation.  */
3424
3425 static void
3426 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3427 {
3428   /* Digits 0-9.
3429      .....................000000000.111111.11112222222222
3430      .....................123456789.012345.67890123456789.  */
3431   const char *content = ("        \"01234\" /* non-str */\n"
3432                          "        \"56789\" /* non-str */\n");
3433   lexer_test test (case_, content, NULL);
3434
3435   location_t input_locs[2];
3436
3437   /* Verify that we get the expected tokens back.  */
3438   auto_vec <cpp_string> input_strings;
3439   const cpp_token *tok_a = test.get_token ();
3440   ASSERT_EQ (tok_a->type, CPP_STRING);
3441   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3442   input_strings.safe_push (tok_a->val.str);
3443   input_locs[0] = tok_a->src_loc;
3444
3445   const cpp_token *tok_b = test.get_token ();
3446   ASSERT_EQ (tok_b->type, CPP_STRING);
3447   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3448   input_strings.safe_push (tok_b->val.str);
3449   input_locs[1] = tok_b->src_loc;
3450
3451   /* Verify that cpp_interpret_string works.  */
3452   cpp_string dst_string;
3453   const enum cpp_ttype type = CPP_STRING;
3454   bool result = cpp_interpret_string (test.m_parser,
3455                                       input_strings.address (), 2,
3456                                       &dst_string, type);
3457   ASSERT_TRUE (result);
3458   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3459   free (const_cast <unsigned char *> (dst_string.text));
3460
3461   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3462   test.m_concats.record_string_concatenation (2, input_locs);
3463
3464   location_t initial_loc = input_locs[0];
3465
3466   /* "01234" on line 1.  */
3467   for (int i = 0; i <= 4; i++)
3468     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3469   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3470   for (int i = 5; i <= 10; i++)
3471     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3472
3473   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3474 }
3475
3476 /* Another test of string literal concatenation.  */
3477
3478 static void
3479 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3480 {
3481   /* Digits 0-9.
3482      .....................000000000.111.11111112222222
3483      .....................123456789.012.34567890123456.  */
3484   const char *content = ("        \"01\" /* non-str */\n"
3485                          "        \"23\" /* non-str */\n"
3486                          "        \"45\" /* non-str */\n"
3487                          "        \"67\" /* non-str */\n"
3488                          "        \"89\" /* non-str */\n");
3489   lexer_test test (case_, content, NULL);
3490
3491   auto_vec <cpp_string> input_strings;
3492   location_t input_locs[5];
3493
3494   /* Verify that we get the expected tokens back.  */
3495   for (int i = 0; i < 5; i++)
3496     {
3497       const cpp_token *tok = test.get_token ();
3498       ASSERT_EQ (tok->type, CPP_STRING);
3499       input_strings.safe_push (tok->val.str);
3500       input_locs[i] = tok->src_loc;
3501     }
3502
3503   /* Verify that cpp_interpret_string works.  */
3504   cpp_string dst_string;
3505   const enum cpp_ttype type = CPP_STRING;
3506   bool result = cpp_interpret_string (test.m_parser,
3507                                       input_strings.address (), 5,
3508                                       &dst_string, type);
3509   ASSERT_TRUE (result);
3510   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3511   free (const_cast <unsigned char *> (dst_string.text));
3512
3513   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3514   test.m_concats.record_string_concatenation (5, input_locs);
3515
3516   location_t initial_loc = input_locs[0];
3517
3518   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3519      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3520      and expect get_source_range_for_substring to fail.
3521      However, for a string concatenation test, we can have a case
3522      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3523      but subsequent strings can be after it.
3524      Attempting to detect this within assert_char_at_range
3525      would overcomplicate the logic for the common test cases, so
3526      we detect it here.  */
3527   if (should_have_column_data_p (input_locs[0])
3528       && !should_have_column_data_p (input_locs[4]))
3529     {
3530       /* Verify that get_source_range_for_substring gracefully rejects
3531          this case.  */
3532       source_range actual_range;
3533       const char *err
3534         = get_source_range_for_char (test.m_parser, &test.m_concats,
3535                                      initial_loc, type, 0, &actual_range);
3536       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3537       return;
3538     }
3539
3540   for (int i = 0; i < 5; i++)
3541     for (int j = 0; j < 2; j++)
3542       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3543                             i + 1, 10 + j, 10 + j);
3544
3545   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3546   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3547
3548   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3549 }
3550
3551 /* Another test of string literal concatenation, this time combined with
3552    various kinds of escaped characters.  */
3553
3554 static void
3555 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3556 {
3557   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3558      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3559   const char *content
3560     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3561        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3562     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3563   lexer_test test (case_, content, NULL);
3564
3565   auto_vec <cpp_string> input_strings;
3566   location_t input_locs[4];
3567
3568   /* Verify that we get the expected tokens back.  */
3569   for (int i = 0; i < 4; i++)
3570     {
3571       const cpp_token *tok = test.get_token ();
3572       ASSERT_EQ (tok->type, CPP_STRING);
3573       input_strings.safe_push (tok->val.str);
3574       input_locs[i] = tok->src_loc;
3575     }
3576
3577   /* Verify that cpp_interpret_string works.  */
3578   cpp_string dst_string;
3579   const enum cpp_ttype type = CPP_STRING;
3580   bool result = cpp_interpret_string (test.m_parser,
3581                                       input_strings.address (), 4,
3582                                       &dst_string, type);
3583   ASSERT_TRUE (result);
3584   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3585   free (const_cast <unsigned char *> (dst_string.text));
3586
3587   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3588   test.m_concats.record_string_concatenation (4, input_locs);
3589
3590   location_t initial_loc = input_locs[0];
3591
3592   for (int i = 0; i <= 4; i++)
3593     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3594   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3595   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3596   for (int i = 7; i <= 9; i++)
3597     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3598
3599   /* NUL-terminator should use the location of the final closing quote.  */
3600   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3601
3602   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3603 }
3604
3605 /* Test of string literal in a macro.  */
3606
3607 static void
3608 test_lexer_string_locations_macro (const line_table_case &case_)
3609 {
3610   /* Digits 0-9.
3611      .....................0000000001111111111.22222222223.
3612      .....................1234567890123456789.01234567890.  */
3613   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3614                          "  MACRO");
3615   lexer_test test (case_, content, NULL);
3616
3617   /* Verify that we get the expected tokens back.  */
3618   const cpp_token *tok = test.get_token ();
3619   ASSERT_EQ (tok->type, CPP_PADDING);
3620
3621   tok = test.get_token ();
3622   ASSERT_EQ (tok->type, CPP_STRING);
3623   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3624
3625   /* Verify ranges of individual characters.  We ought to
3626      see columns within the macro definition.  */
3627   for (int i = 0; i <= 10; i++)
3628     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3629                           i, 1, 20 + i, 20 + i);
3630
3631   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3632
3633   tok = test.get_token ();
3634   ASSERT_EQ (tok->type, CPP_PADDING);
3635 }
3636
3637 /* Test of stringification of a macro argument.  */
3638
3639 static void
3640 test_lexer_string_locations_stringified_macro_argument
3641   (const line_table_case &case_)
3642 {
3643   /* .....................000000000111111111122222222223.
3644      .....................123456789012345678901234567890.  */
3645   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3646                          "MACRO(foo)\n");
3647   lexer_test test (case_, content, NULL);
3648
3649   /* Verify that we get the expected token back.  */
3650   const cpp_token *tok = test.get_token ();
3651   ASSERT_EQ (tok->type, CPP_PADDING);
3652
3653   tok = test.get_token ();
3654   ASSERT_EQ (tok->type, CPP_STRING);
3655   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3656
3657   /* We don't support getting the location of a stringified macro
3658      argument.  Verify that it fails gracefully.  */
3659   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3660                                   "cpp_interpret_string_1 failed");
3661
3662   tok = test.get_token ();
3663   ASSERT_EQ (tok->type, CPP_PADDING);
3664
3665   tok = test.get_token ();
3666   ASSERT_EQ (tok->type, CPP_PADDING);
3667 }
3668
3669 /* Ensure that we are fail gracefully if something attempts to pass
3670    in a location that isn't a string literal token.  Seen on this code:
3671
3672      const char a[] = " %d ";
3673      __builtin_printf (a, 0.5);
3674                        ^
3675
3676    when c-format.cc erroneously used the indicated one-character
3677    location as the format string location, leading to a read past the
3678    end of a string buffer in cpp_interpret_string_1.  */
3679
3680 static void
3681 test_lexer_string_locations_non_string (const line_table_case &case_)
3682 {
3683   /* .....................000000000111111111122222222223.
3684      .....................123456789012345678901234567890.  */
3685   const char *content = ("         a\n");
3686   lexer_test test (case_, content, NULL);
3687
3688   /* Verify that we get the expected token back.  */
3689   const cpp_token *tok = test.get_token ();
3690   ASSERT_EQ (tok->type, CPP_NAME);
3691   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3692
3693   /* At this point, libcpp is attempting to interpret the name as a
3694      string literal, despite it not starting with a quote.  We don't detect
3695      that, but we should at least fail gracefully.  */
3696   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3697                                   "cpp_interpret_string_1 failed");
3698 }
3699
3700 /* Ensure that we can read substring information for a token which
3701    starts in one linemap and ends in another .  Adapted from
3702    gcc.dg/cpp/pr69985.c.  */
3703
3704 static void
3705 test_lexer_string_locations_long_line (const line_table_case &case_)
3706 {
3707   /* .....................000000.000111111111
3708      .....................123456.789012346789.  */
3709   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3710                          "     \"0123456789012345678901234567890123456789"
3711                          "0123456789012345678901234567890123456789"
3712                          "0123456789012345678901234567890123456789"
3713                          "0123456789\"\n");
3714
3715   lexer_test test (case_, content, NULL);
3716
3717   /* Verify that we get the expected token back.  */
3718   const cpp_token *tok = test.get_token ();
3719   ASSERT_EQ (tok->type, CPP_STRING);
3720
3721   if (!should_have_column_data_p (line_table->highest_location))
3722     return;
3723
3724   /* Verify ranges of individual characters.  */
3725   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3726   for (int i = 0; i < 131; i++)
3727     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3728                           i, 2, 7 + i, 7 + i);
3729 }
3730
3731 /* Test of locations within a raw string that doesn't contain a newline.  */
3732
3733 static void
3734 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3735 {
3736   /* .....................00.0000000111111111122.
3737      .....................12.3456789012345678901.  */
3738   const char *content = ("R\"foo(0123456789)foo\"\n");
3739   lexer_test test (case_, content, NULL);
3740
3741   /* Verify that we get the expected token back.  */
3742   const cpp_token *tok = test.get_token ();
3743   ASSERT_EQ (tok->type, CPP_STRING);
3744
3745   /* Verify that cpp_interpret_string works.  */
3746   cpp_string dst_string;
3747   const enum cpp_ttype type = CPP_STRING;
3748   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3749                                       &dst_string, type);
3750   ASSERT_TRUE (result);
3751   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3752   free (const_cast <unsigned char *> (dst_string.text));
3753
3754   if (!should_have_column_data_p (line_table->highest_location))
3755     return;
3756
3757   /* 0-9, plus the nil terminator.  */
3758   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3759   for (int i = 0; i < 11; i++)
3760     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3761                           i, 1, 7 + i, 7 + i);
3762 }
3763
3764 /* Test of locations within a raw string that contains a newline.  */
3765
3766 static void
3767 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3768 {
3769   /* .....................00.0000.
3770      .....................12.3456.  */
3771   const char *content = ("R\"foo(\n"
3772   /* .....................00000.
3773      .....................12345.  */
3774                          "hello\n"
3775                          "world\n"
3776   /* .....................00000.
3777      .....................12345.  */
3778                          ")foo\"\n");
3779   lexer_test test (case_, content, NULL);
3780
3781   /* Verify that we get the expected token back.  */
3782   const cpp_token *tok = test.get_token ();
3783   ASSERT_EQ (tok->type, CPP_STRING);
3784
3785   /* Verify that cpp_interpret_string works.  */
3786   cpp_string dst_string;
3787   const enum cpp_ttype type = CPP_STRING;
3788   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3789                                       &dst_string, type);
3790   ASSERT_TRUE (result);
3791   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3792   free (const_cast <unsigned char *> (dst_string.text));
3793
3794   if (!should_have_column_data_p (line_table->highest_location))
3795     return;
3796
3797   /* Currently we don't support locations within raw strings that
3798      contain newlines.  */
3799   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3800                                   "range endpoints are on different lines");
3801 }
3802
3803 /* Test of parsing an unterminated raw string.  */
3804
3805 static void
3806 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3807 {
3808   const char *content = "R\"ouch()ouCh\" /* etc */";
3809
3810   lexer_diagnostic_sink diagnostics;
3811   lexer_test test (case_, content, &diagnostics);
3812   test.m_implicitly_expect_EOF = false;
3813
3814   /* Attempt to parse the raw string.  */
3815   const cpp_token *tok = test.get_token ();
3816   ASSERT_EQ (tok->type, CPP_EOF);
3817
3818   ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3819   /* We expect the message "unterminated raw string"
3820      in the "cpplib" translation domain.
3821      It's not clear that dgettext is available on all supported hosts,
3822      so this assertion is commented-out for now.
3823        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3824                      diagnostics.m_diagnostics[0]);
3825   */
3826 }
3827
3828 /* Test of lexing char constants.  */
3829
3830 static void
3831 test_lexer_char_constants (const line_table_case &case_)
3832 {
3833   /* Various char constants.
3834      .....................0000000001111111111.22222222223.
3835      .....................1234567890123456789.01234567890.  */
3836   const char *content = ("         'a'\n"
3837                          "        u'a'\n"
3838                          "        U'a'\n"
3839                          "        L'a'\n"
3840                          "         'abc'\n");
3841   lexer_test test (case_, content, NULL);
3842
3843   /* Verify that we get the expected tokens back.  */
3844   /* 'a'.  */
3845   const cpp_token *tok = test.get_token ();
3846   ASSERT_EQ (tok->type, CPP_CHAR);
3847   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3848
3849   unsigned int chars_seen;
3850   int unsignedp;
3851   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3852                                           &chars_seen, &unsignedp);
3853   ASSERT_EQ (cc, 'a');
3854   ASSERT_EQ (chars_seen, 1);
3855
3856   /* u'a'.  */
3857   tok = test.get_token ();
3858   ASSERT_EQ (tok->type, CPP_CHAR16);
3859   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3860
3861   /* U'a'.  */
3862   tok = test.get_token ();
3863   ASSERT_EQ (tok->type, CPP_CHAR32);
3864   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3865
3866   /* L'a'.  */
3867   tok = test.get_token ();
3868   ASSERT_EQ (tok->type, CPP_WCHAR);
3869   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3870
3871   /* 'abc' (c-char-sequence).  */
3872   tok = test.get_token ();
3873   ASSERT_EQ (tok->type, CPP_CHAR);
3874   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3875 }
3876 /* A table of interesting location_t values, giving one axis of our test
3877    matrix.  */
3878
3879 static const location_t boundary_locations[] = {
3880   /* Zero means "don't override the default values for a new line_table".  */
3881   0,
3882
3883   /* An arbitrary non-zero value that isn't close to one of
3884      the boundary values below.  */
3885   0x10000,
3886
3887   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3888   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3889   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3890   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3891   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3892   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3893
3894   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3895   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3896   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3897   LINE_MAP_MAX_LOCATION_WITH_COLS,
3898   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3899   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3900 };
3901
3902 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3903
3904 void
3905 for_each_line_table_case (void (*testcase) (const line_table_case &))
3906 {
3907   /* As noted above in the description of struct line_table_case,
3908      we want to explore a test matrix of interesting line_table
3909      situations, running various selftests for each case within the
3910      matrix.  */
3911
3912   /* Run all tests with:
3913      (a) line_table->default_range_bits == 0, and
3914      (b) line_table->default_range_bits == 5.  */
3915   int num_cases_tested = 0;
3916   for (int default_range_bits = 0; default_range_bits <= 5;
3917        default_range_bits += 5)
3918     {
3919       /* ...and use each of the "interesting" location values as
3920          the starting location within line_table.  */
3921       const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
3922       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3923         {
3924           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3925
3926           testcase (c);
3927
3928           num_cases_tested++;
3929         }
3930     }
3931
3932   /* Verify that we fully covered the test matrix.  */
3933   ASSERT_EQ (num_cases_tested, 2 * 12);
3934 }
3935
3936 /* Verify that when presented with a consecutive pair of locations with
3937    a very large line offset, we don't attempt to consolidate them into
3938    a single ordinary linemap where the line offsets within the line map
3939    would lead to overflow (PR lto/88147).  */
3940
3941 static void
3942 test_line_offset_overflow ()
3943 {
3944   line_table_test ltt (line_table_case (5, 0));
3945
3946   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3947   linemap_line_start (line_table, 1, 100);
3948   location_t loc_a = linemap_line_start (line_table, 2578, 255);
3949   assert_loceq ("foo.c", 2578, 0, loc_a);
3950
3951   const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3952   ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3953   ASSERT_EQ (ordmap_a->m_range_bits, 5);
3954
3955   location_t loc_b = linemap_line_start (line_table, 404198, 512);
3956   assert_loceq ("foo.c", 404198, 0, loc_b);
3957
3958   /* We should have started a new linemap, rather than attempting to store
3959      a very large line offset.  */
3960   const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3961   ASSERT_NE (ordmap_a, ordmap_b);
3962 }
3963
3964 void test_cpp_utf8 ()
3965 {
3966   const int def_tabstop = 8;
3967   cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3968
3969   /* Verify that wcwidth of invalid UTF-8 or control bytes is 1.  */
3970   {
3971     int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
3972     ASSERT_EQ (8, w_bad);
3973     int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
3974     ASSERT_EQ (5, w_ctrl);
3975   }
3976
3977   /* Verify that wcwidth of valid UTF-8 is as expected.  */
3978   {
3979     const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
3980     ASSERT_EQ (1, w_pi);
3981     const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
3982     ASSERT_EQ (2, w_emoji);
3983     const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3984                                                         policy);
3985     ASSERT_EQ (1, w_umlaut_precomposed);
3986     const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3987                                                       policy);
3988     ASSERT_EQ (1, w_umlaut_combining);
3989     const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
3990     ASSERT_EQ (2, w_han);
3991     const int w_ascii = cpp_display_width ("GCC", 3, policy);
3992     ASSERT_EQ (3, w_ascii);
3993     const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3994                                            "\x9f! \xe4\xb8\xba y\xcc\x88",
3995                                            24, policy);
3996     ASSERT_EQ (18, w_mixed);
3997   }
3998
3999   /* Verify that display width properly expands tabs.  */
4000   {
4001     const char *tstr = "\tabc\td";
4002     ASSERT_EQ (6, cpp_display_width (tstr, 6,
4003                                      cpp_char_column_policy (1, cpp_wcwidth)));
4004     ASSERT_EQ (10, cpp_display_width (tstr, 6,
4005                                       cpp_char_column_policy (3, cpp_wcwidth)));
4006     ASSERT_EQ (17, cpp_display_width (tstr, 6,
4007                                       cpp_char_column_policy (8, cpp_wcwidth)));
4008     ASSERT_EQ (1,
4009                cpp_display_column_to_byte_column
4010                  (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
4011   }
4012
4013   /* Verify that cpp_byte_column_to_display_column can go past the end,
4014      and similar edge cases.  */
4015   {
4016     const char *str
4017       /* Display columns.
4018          111111112345  */
4019       = "\xcf\x80 abc";
4020       /* 111122223456
4021          Byte columns.  */
4022
4023     ASSERT_EQ (5, cpp_display_width (str, 6, policy));
4024     ASSERT_EQ (105,
4025                cpp_byte_column_to_display_column (str, 6, 106, policy));
4026     ASSERT_EQ (10000,
4027                cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
4028     ASSERT_EQ (0,
4029                cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
4030   }
4031
4032   /* Verify that cpp_display_column_to_byte_column can go past the end,
4033      and similar edge cases, and check invertibility.  */
4034   {
4035     const char *str
4036       /* Display columns.
4037          000000000000000000000000000000000000011
4038          111111112222222234444444455555555678901  */
4039       = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
4040       /* 000000000000000000000000000000000111111
4041          111122223333444456666777788889999012345
4042          Byte columns.  */
4043     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
4044     ASSERT_EQ (15,
4045                cpp_display_column_to_byte_column (str, 15, 11, policy));
4046     ASSERT_EQ (115,
4047                cpp_display_column_to_byte_column (str, 15, 111, policy));
4048     ASSERT_EQ (10000,
4049                cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
4050     ASSERT_EQ (0,
4051                cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
4052
4053     /* Verify that we do not interrupt a UTF-8 sequence.  */
4054     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
4055
4056     for (int byte_col = 1; byte_col <= 15; ++byte_col)
4057       {
4058         const int disp_col
4059           = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
4060         const int byte_col2
4061           = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
4062
4063         /* If we ask for the display column in the middle of a UTF-8
4064            sequence, it will return the length of the partial sequence,
4065            matching the behavior of GCC before display column support.
4066            Otherwise check the round trip was successful.  */
4067         if (byte_col < 4)
4068           ASSERT_EQ (byte_col, disp_col);
4069         else if (byte_col >= 6 && byte_col < 9)
4070           ASSERT_EQ (3 + (byte_col - 5), disp_col);
4071         else
4072           ASSERT_EQ (byte_col2, byte_col);
4073       }
4074   }
4075 }
4076
4077 static bool
4078 check_cpp_valid_utf8_p (const char *str)
4079 {
4080   return cpp_valid_utf8_p (str, strlen (str));
4081 }
4082
4083 /* Check that cpp_valid_utf8_p works as expected.  */
4084
4085 static void
4086 test_cpp_valid_utf8_p ()
4087 {
4088   ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world"));
4089
4090   /* 2-byte char (pi).  */
4091   ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80"));
4092
4093   /* 3-byte chars (the Japanese word "mojibake").  */
4094   ASSERT_TRUE (check_cpp_valid_utf8_p
4095                (
4096                 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
4097                    UTF-8: 0xE6 0x96 0x87
4098                    C octal escaped UTF-8: \346\226\207.  */
4099                 "\346\226\207"
4100                 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
4101                    UTF-8: 0xE5 0xAD 0x97
4102                    C octal escaped UTF-8: \345\255\227.  */
4103                 "\345\255\227"
4104                 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
4105                    UTF-8: 0xE5 0x8C 0x96
4106                    C octal escaped UTF-8: \345\214\226.  */
4107                 "\345\214\226"
4108                 /* U+3051 HIRAGANA LETTER KE
4109                    UTF-8: 0xE3 0x81 0x91
4110                    C octal escaped UTF-8: \343\201\221.  */
4111                 "\343\201\221"));
4112
4113   /* 4-byte char: an emoji.  */
4114   ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82"));
4115
4116   /* Control codes, including the NUL byte.  */
4117   ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", 5));
4118
4119   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!"));
4120
4121   /* Unexpected continuation bytes.  */
4122   for (unsigned char continuation_byte = 0x80;
4123        continuation_byte <= 0xbf;
4124        continuation_byte++)
4125     ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, 1));
4126
4127   /* "Lonely start characters" for 2-byte sequences.  */
4128   {
4129     unsigned char buf[2];
4130     buf[1] = ' ';
4131     for (buf[0] = 0xc0;
4132          buf[0] <= 0xdf;
4133          buf[0]++)
4134       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4135   }
4136
4137   /* "Lonely start characters" for 3-byte sequences.  */
4138   {
4139     unsigned char buf[2];
4140     buf[1] = ' ';
4141     for (buf[0] = 0xe0;
4142          buf[0] <= 0xef;
4143          buf[0]++)
4144       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4145   }
4146
4147   /* "Lonely start characters" for 4-byte sequences.  */
4148   {
4149     unsigned char buf[2];
4150     buf[1] = ' ';
4151     for (buf[0] = 0xf0;
4152          buf[0] <= 0xf4;
4153          buf[0]++)
4154       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4155   }
4156
4157   /* Invalid start characters (formerly valid for 5-byte and 6-byte
4158      sequences).  */
4159   {
4160     unsigned char buf[2];
4161     buf[1] = ' ';
4162     for (buf[0] = 0xf5;
4163          buf[0] <= 0xfd;
4164          buf[0]++)
4165       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4166   }
4167
4168   /* Impossible bytes.  */
4169   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0"));
4170   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1"));
4171   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe"));
4172   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff"));
4173 }
4174
4175 /* Run all of the selftests within this file.  */
4176
4177 void
4178 input_cc_tests ()
4179 {
4180   test_linenum_comparisons ();
4181   test_should_have_column_data_p ();
4182   test_unknown_location ();
4183   test_builtins ();
4184   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
4185
4186   for_each_line_table_case (test_accessing_ordinary_linemaps);
4187   for_each_line_table_case (test_lexer);
4188   for_each_line_table_case (test_lexer_string_locations_simple);
4189   for_each_line_table_case (test_lexer_string_locations_ebcdic);
4190   for_each_line_table_case (test_lexer_string_locations_hex);
4191   for_each_line_table_case (test_lexer_string_locations_oct);
4192   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
4193   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
4194   for_each_line_table_case (test_lexer_string_locations_ucn4);
4195   for_each_line_table_case (test_lexer_string_locations_ucn8);
4196   for_each_line_table_case (test_lexer_string_locations_wide_string);
4197   for_each_line_table_case (test_lexer_string_locations_string16);
4198   for_each_line_table_case (test_lexer_string_locations_string32);
4199   for_each_line_table_case (test_lexer_string_locations_u8);
4200   for_each_line_table_case (test_lexer_string_locations_utf8_source);
4201   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
4202   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
4203   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
4204   for_each_line_table_case (test_lexer_string_locations_macro);
4205   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
4206   for_each_line_table_case (test_lexer_string_locations_non_string);
4207   for_each_line_table_case (test_lexer_string_locations_long_line);
4208   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
4209   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
4210   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
4211   for_each_line_table_case (test_lexer_char_constants);
4212
4213   test_reading_source_line ();
4214
4215   test_line_offset_overflow ();
4216
4217   test_cpp_utf8 ();
4218   test_cpp_valid_utf8_p ();
4219 }
4220
4221 } // namespace selftest
4222
4223 #endif /* CHECKING_P */