gcc/input.cc

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic.h"
  25 #include "selftest.h"
  26 #include "cpplib.h"
  27
  28 #ifndef HAVE_ICONV
  29 #define HAVE_ICONV 0
  30 #endif
  31
  32 const char *
  33 special_fname_builtin ()
  34 {
  35   return _("<built-in>");
  36 }
  37
  38 /* Input charset configuration.  */
  39 static const char *default_charset_callback (const char *)
  40 {
  41   return nullptr;
  42 }
  43
  44 void
  45 file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
  46                                       bool should_skip_bom)
  47 {
  48   in_context.ccb = (ccb ? ccb : default_charset_callback);
  49   in_context.should_skip_bom = should_skip_bom;
  50 }
  51
  52 /* This is a cache used by get_next_line to store the content of a
  53    file to be searched for file lines.  */
  54 class file_cache_slot
  55 {
  56 public:
  57   file_cache_slot ();
  58   ~file_cache_slot ();
  59
  60   bool read_line_num (size_t line_num,
  61                       char ** line, ssize_t *line_len);
  62
  63   /* Accessors.  */
  64   const char *get_file_path () const { return m_file_path; }
  65   unsigned get_use_count () const { return m_use_count; }
  66   bool missing_trailing_newline_p () const
  67   {
  68     return m_missing_trailing_newline;
  69   }
  70   char_span get_full_file_content ();
  71
  72   void inc_use_count () { m_use_count++; }
  73
  74   bool create (const file_cache::input_context &in_context,
  75                const char *file_path, FILE *fp, unsigned highest_use_count);
  76   void evict ();
  77
  78  private:
  79   /* These are information used to store a line boundary.  */
  80   class line_info
  81   {
  82   public:
  83     /* The line number.  It starts from 1.  */
  84     size_t line_num;
  85
  86     /* The position (byte count) of the beginning of the line,
  87        relative to the file data pointer.  This starts at zero.  */
  88     size_t start_pos;
  89
  90     /* The position (byte count) of the last byte of the line.  This
  91        normally points to the '\n' character, or to one byte after the
  92        last byte of the file, if the file doesn't contain a '\n'
  93        character.  */
  94     size_t end_pos;
  95
  96     line_info (size_t l, size_t s, size_t e)
  97       : line_num (l), start_pos (s), end_pos (e)
  98     {}
  99
 100     line_info ()
 101       :line_num (0), start_pos (0), end_pos (0)
 102     {}
 103   };
 104
 105   bool needs_read_p () const;
 106   bool needs_grow_p () const;
 107   void maybe_grow ();
 108   bool read_data ();
 109   bool maybe_read_data ();
 110   bool get_next_line (char **line, ssize_t *line_len);
 111   bool read_next_line (char ** line, ssize_t *line_len);
 112   bool goto_next_line ();
 113
 114   static const size_t buffer_size = 4 * 1024;
 115   static const size_t line_record_size = 100;
 116
 117   /* The number of time this file has been accessed.  This is used
 118      to designate which file cache to evict from the cache
 119      array.  */
 120   unsigned m_use_count;
 121
 122   /* The file_path is the key for identifying a particular file in
 123      the cache.
 124      For libcpp-using code, the underlying buffer for this field is
 125      owned by the corresponding _cpp_file within the cpp_reader.  */
 126   const char *m_file_path;
 127
 128   FILE *m_fp;
 129
 130   /* This points to the content of the file that we've read so
 131      far.  */
 132   char *m_data;
 133
 134   /* The allocated buffer to be freed may start a little earlier than DATA,
 135      e.g. if a UTF8 BOM was skipped at the beginning.  */
 136   int m_alloc_offset;
 137
 138   /*  The size of the DATA array above.*/
 139   size_t m_size;
 140
 141   /* The number of bytes read from the underlying file so far.  This
 142      must be less (or equal) than SIZE above.  */
 143   size_t m_nb_read;
 144
 145   /* The index of the beginning of the current line.  */
 146   size_t m_line_start_idx;
 147
 148   /* The number of the previous line read.  This starts at 1.  Zero
 149      means we've read no line so far.  */
 150   size_t m_line_num;
 151
 152   /* This is the total number of lines of the current file.  At the
 153      moment, we try to get this information from the line map
 154      subsystem.  Note that this is just a hint.  When using the C++
 155      front-end, this hint is correct because the input file is then
 156      completely tokenized before parsing starts; so the line map knows
 157      the number of lines before compilation really starts.  For e.g,
 158      the C front-end, it can happen that we start emitting diagnostics
 159      before the line map has seen the end of the file.  */
 160   size_t m_total_lines;
 161
 162   /* Could this file be missing a trailing newline on its final line?
 163      Initially true (to cope with empty files), set to true/false
 164      as each line is read.  */
 165   bool m_missing_trailing_newline;
 166
 167   /* This is a record of the beginning and end of the lines we've seen
 168      while reading the file.  This is useful to avoid walking the data
 169      from the beginning when we are asked to read a line that is
 170      before LINE_START_IDX above.  Note that the maximum size of this
 171      record is line_record_size, so that the memory consumption
 172      doesn't explode.  We thus scale total_lines down to
 173      line_record_size.  */
 174   vec<line_info, va_heap> m_line_record;
 175
 176   void offset_buffer (int offset)
 177   {
 178     gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
 179                 : (size_t) offset <= m_size);
 180     gcc_assert (m_data);
 181     m_alloc_offset += offset;
 182     m_data += offset;
 183     m_size -= offset;
 184   }
 185
 186 };
 187
 188 /* Current position in real source file.  */
 189
 190 location_t input_location = UNKNOWN_LOCATION;
 191
 192 class line_maps *line_table;
 193
 194 /* A stashed copy of "line_table" for use by selftest::line_table_test.
 195    This needs to be a global so that it can be a GC root, and thus
 196    prevent the stashed copy from being garbage-collected if the GC runs
 197    during a line_table_test.  */
 198
 199 class line_maps *saved_line_table;
 200
 201 /* Expand the source location LOC into a human readable location.  If
 202    LOC resolves to a builtin location, the file name of the readable
 203    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 204    TRUE and LOC is virtual, then it is resolved to the expansion
 205    point of the involved macro.  Otherwise, it is resolved to the
 206    spelling location of the token.
 207
 208    When resolving to the spelling location of the token, if the
 209    resulting location is for a built-in location (that is, it has no
 210    associated line/column) in the context of a macro expansion, the
 211    returned location is the first one (while unwinding the macro
 212    location towards its expansion point) that is in real source
 213    code.
 214
 215    ASPECT controls which part of the location to use.  */
 216
 217 static expanded_location
 218 expand_location_1 (location_t loc,
 219                    bool expansion_point_p,
 220                    enum location_aspect aspect)
 221 {
 222   expanded_location xloc;
 223   const line_map_ordinary *map;
 224   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 225   tree block = NULL;
 226
 227   if (IS_ADHOC_LOC (loc))
 228     {
 229       block = LOCATION_BLOCK (loc);
 230       loc = LOCATION_LOCUS (loc);
 231     }
 232
 233   memset (&xloc, 0, sizeof (xloc));
 234
 235   if (loc >= RESERVED_LOCATION_COUNT)
 236     {
 237       if (!expansion_point_p)
 238         {
 239           /* We want to resolve LOC to its spelling location.
 240
 241              But if that spelling location is a reserved location that
 242              appears in the context of a macro expansion (like for a
 243              location for a built-in token), let's consider the first
 244              location (toward the expansion point) that is not reserved;
 245              that is, the first location that is in real source code.  */
 246           loc = linemap_unwind_to_first_non_reserved_loc (line_table,
 247                                                           loc, NULL);
 248           lrk = LRK_SPELLING_LOCATION;
 249         }
 250       loc = linemap_resolve_location (line_table, loc, lrk, &map);
 251
 252       /* loc is now either in an ordinary map, or is a reserved location.
 253          If it is a compound location, the caret is in a spelling location,
 254          but the start/finish might still be a virtual location.
 255          Depending of what the caller asked for, we may need to recurse
 256          one level in order to resolve any virtual locations in the
 257          end-points.  */
 258       switch (aspect)
 259         {
 260         default:
 261           gcc_unreachable ();
 262           /* Fall through.  */
 263         case LOCATION_ASPECT_CARET:
 264           break;
 265         case LOCATION_ASPECT_START:
 266           {
 267             location_t start = get_start (loc);
 268             if (start != loc)
 269               return expand_location_1 (start, expansion_point_p, aspect);
 270           }
 271           break;
 272         case LOCATION_ASPECT_FINISH:
 273           {
 274             location_t finish = get_finish (loc);
 275             if (finish != loc)
 276               return expand_location_1 (finish, expansion_point_p, aspect);
 277           }
 278           break;
 279         }
 280       xloc = linemap_expand_location (line_table, map, loc);
 281     }
 282
 283   xloc.data = block;
 284   if (loc <= BUILTINS_LOCATION)
 285     xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin ();
 286
 287   return xloc;
 288 }
 289
 290 /* Initialize the set of cache used for files accessed by caret
 291    diagnostic.  */
 292
 293 static void
 294 diagnostic_file_cache_init (void)
 295 {
 296   gcc_assert (global_dc);
 297   if (global_dc->m_file_cache == NULL)
 298     global_dc->m_file_cache = new file_cache ();
 299 }
 300
 301 /* Free the resources used by the set of cache used for files accessed
 302    by caret diagnostic.  */
 303
 304 void
 305 diagnostic_file_cache_fini (void)
 306 {
 307   if (global_dc->m_file_cache)
 308     {
 309       delete global_dc->m_file_cache;
 310       global_dc->m_file_cache = NULL;
 311     }
 312 }
 313
 314 /* Return the total lines number that have been read so far by the
 315    line map (in the preprocessor) so far.  For languages like C++ that
 316    entirely preprocess the input file before starting to parse, this
 317    equals the actual number of lines of the file.  */
 318
 319 static size_t
 320 total_lines_num (const char *file_path)
 321 {
 322   size_t r = 0;
 323   location_t l = 0;
 324   if (linemap_get_file_highest_location (line_table, file_path, &l))
 325     {
 326       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 327       expanded_location xloc = expand_location (l);
 328       r = xloc.line;
 329     }
 330   return r;
 331 }
 332
 333 /* Lookup the cache used for the content of a given file accessed by
 334    caret diagnostic.  Return the found cached file, or NULL if no
 335    cached file was found.  */
 336
 337 file_cache_slot *
 338 file_cache::lookup_file (const char *file_path)
 339 {
 340   gcc_assert (file_path);
 341
 342   /* This will contain the found cached file.  */
 343   file_cache_slot *r = NULL;
 344   for (unsigned i = 0; i < num_file_slots; ++i)
 345     {
 346       file_cache_slot *c = &m_file_slots[i];
 347       if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
 348         {
 349           c->inc_use_count ();
 350           r = c;
 351         }
 352     }
 353
 354   if (r)
 355     r->inc_use_count ();
 356
 357   return r;
 358 }
 359
 360 /* Purge any mention of FILENAME from the cache of files used for
 361    printing source code.  For use in selftests when working
 362    with tempfiles.  */
 363
 364 void
 365 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
 366 {
 367   gcc_assert (file_path);
 368
 369   if (!global_dc->m_file_cache)
 370     return;
 371
 372   global_dc->m_file_cache->forcibly_evict_file (file_path);
 373 }
 374
 375 void
 376 file_cache::forcibly_evict_file (const char *file_path)
 377 {
 378   gcc_assert (file_path);
 379
 380   file_cache_slot *r = lookup_file (file_path);
 381   if (!r)
 382     /* Not found.  */
 383     return;
 384
 385   r->evict ();
 386 }
 387
 388 void
 389 file_cache_slot::evict ()
 390 {
 391   m_file_path = NULL;
 392   if (m_fp)
 393     fclose (m_fp);
 394   m_fp = NULL;
 395   m_nb_read = 0;
 396   m_line_start_idx = 0;
 397   m_line_num = 0;
 398   m_line_record.truncate (0);
 399   m_use_count = 0;
 400   m_total_lines = 0;
 401   m_missing_trailing_newline = true;
 402 }
 403
 404 /* Return the file cache that has been less used, recently, or the
 405    first empty one.  If HIGHEST_USE_COUNT is non-null,
 406    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 407    in the cache table.  */
 408
 409 file_cache_slot*
 410 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
 411 {
 412   diagnostic_file_cache_init ();
 413
 414   file_cache_slot *to_evict = &m_file_slots[0];
 415   unsigned huc = to_evict->get_use_count ();
 416   for (unsigned i = 1; i < num_file_slots; ++i)
 417     {
 418       file_cache_slot *c = &m_file_slots[i];
 419       bool c_is_empty = (c->get_file_path () == NULL);
 420
 421       if (c->get_use_count () < to_evict->get_use_count ()
 422           || (to_evict->get_file_path () && c_is_empty))
 423         /* We evict C because it's either an entry with a lower use
 424            count or one that is empty.  */
 425         to_evict = c;
 426
 427       if (huc < c->get_use_count ())
 428         huc = c->get_use_count ();
 429
 430       if (c_is_empty)
 431         /* We've reached the end of the cache; subsequent elements are
 432            all empty.  */
 433         break;
 434     }
 435
 436   if (highest_use_count)
 437     *highest_use_count = huc;
 438
 439   return to_evict;
 440 }
 441
 442 /* Create the cache used for the content of a given file to be
 443    accessed by caret diagnostic.  This cache is added to an array of
 444    cache and can be retrieved by lookup_file_in_cache_tab.  This
 445    function returns the created cache.  Note that only the last
 446    num_file_slots files are cached.
 447
 448    This can return nullptr if the FILE_PATH can't be opened for
 449    reading, or if the content can't be converted to the input_charset.  */
 450
 451 file_cache_slot*
 452 file_cache::add_file (const char *file_path)
 453 {
 454
 455   FILE *fp = fopen (file_path, "r");
 456   if (fp == NULL)
 457     return NULL;
 458
 459   unsigned highest_use_count = 0;
 460   file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
 461   if (!r->create (in_context, file_path, fp, highest_use_count))
 462     return NULL;
 463   return r;
 464 }
 465
 466 /* Get a borrowed char_span to the full content of this file
 467    as decoded according to the input charset, encoded as UTF-8.  */
 468
 469 char_span
 470 file_cache_slot::get_full_file_content ()
 471 {
 472   char *line;
 473   ssize_t line_len;
 474   while (get_next_line (&line, &line_len))
 475     {
 476     }
 477   return char_span (m_data, m_nb_read);
 478 }
 479
 480 /* Populate this slot for use on FILE_PATH and FP, dropping any
 481    existing cached content within it.  */
 482
 483 bool
 484 file_cache_slot::create (const file_cache::input_context &in_context,
 485                          const char *file_path, FILE *fp,
 486                          unsigned highest_use_count)
 487 {
 488   m_file_path = file_path;
 489   if (m_fp)
 490     fclose (m_fp);
 491   m_fp = fp;
 492   if (m_alloc_offset)
 493     offset_buffer (-m_alloc_offset);
 494   m_nb_read = 0;
 495   m_line_start_idx = 0;
 496   m_line_num = 0;
 497   m_line_record.truncate (0);
 498   /* Ensure that this cache entry doesn't get evicted next time
 499      add_file_to_cache_tab is called.  */
 500   m_use_count = ++highest_use_count;
 501   m_total_lines = total_lines_num (file_path);
 502   m_missing_trailing_newline = true;
 503
 504
 505   /* Check the input configuration to determine if we need to do any
 506      transformations, such as charset conversion or BOM skipping.  */
 507   if (const char *input_charset = in_context.ccb (file_path))
 508     {
 509       /* Need a full-blown conversion of the input charset.  */
 510       fclose (m_fp);
 511       m_fp = NULL;
 512       const cpp_converted_source cs
 513         = cpp_get_converted_source (file_path, input_charset);
 514       if (!cs.data)
 515         return false;
 516       if (m_data)
 517         XDELETEVEC (m_data);
 518       m_data = cs.data;
 519       m_nb_read = m_size = cs.len;
 520       m_alloc_offset = cs.data - cs.to_free;
 521     }
 522   else if (in_context.should_skip_bom)
 523     {
 524       if (read_data ())
 525         {
 526           const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
 527           offset_buffer (offset);
 528           m_nb_read -= offset;
 529         }
 530     }
 531
 532   return true;
 533 }
 534
 535 /* file_cache's ctor.  */
 536
 537 file_cache::file_cache ()
 538 : m_file_slots (new file_cache_slot[num_file_slots])
 539 {
 540   initialize_input_context (nullptr, false);
 541 }
 542
 543 /* file_cache's dtor.  */
 544
 545 file_cache::~file_cache ()
 546 {
 547   delete[] m_file_slots;
 548 }
 549
 550 /* Lookup the cache used for the content of a given file accessed by
 551    caret diagnostic.  If no cached file was found, create a new cache
 552    for this file, add it to the array of cached file and return
 553    it.
 554
 555    This can return nullptr on a cache miss if FILE_PATH can't be opened for
 556    reading, or if the content can't be converted to the input_charset.  */
 557
 558 file_cache_slot*
 559 file_cache::lookup_or_add_file (const char *file_path)
 560 {
 561   file_cache_slot *r = lookup_file (file_path);
 562   if (r == NULL)
 563     r = add_file (file_path);
 564   return r;
 565 }
 566
 567 /* Default constructor for a cache of file used by caret
 568    diagnostic.  */
 569
 570 file_cache_slot::file_cache_slot ()
 571 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
 572   m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
 573   m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
 574 {
 575   m_line_record.create (0);
 576 }
 577
 578 /* Destructor for a cache of file used by caret diagnostic.  */
 579
 580 file_cache_slot::~file_cache_slot ()
 581 {
 582   if (m_fp)
 583     {
 584       fclose (m_fp);
 585       m_fp = NULL;
 586     }
 587   if (m_data)
 588     {
 589       offset_buffer (-m_alloc_offset);
 590       XDELETEVEC (m_data);
 591       m_data = 0;
 592     }
 593   m_line_record.release ();
 594 }
 595
 596 /* Returns TRUE iff the cache would need to be filled with data coming
 597    from the file.  That is, either the cache is empty or full or the
 598    current line is empty.  Note that if the cache is full, it would
 599    need to be extended and filled again.  */
 600
 601 bool
 602 file_cache_slot::needs_read_p () const
 603 {
 604   return m_fp && (m_nb_read == 0
 605           || m_nb_read == m_size
 606           || (m_line_start_idx >= m_nb_read - 1));
 607 }
 608
 609 /*  Return TRUE iff the cache is full and thus needs to be
 610     extended.  */
 611
 612 bool
 613 file_cache_slot::needs_grow_p () const
 614 {
 615   return m_nb_read == m_size;
 616 }
 617
 618 /* Grow the cache if it needs to be extended.  */
 619
 620 void
 621 file_cache_slot::maybe_grow ()
 622 {
 623   if (!needs_grow_p ())
 624     return;
 625
 626   if (!m_data)
 627     {
 628       gcc_assert (m_size == 0 && m_alloc_offset == 0);
 629       m_size = buffer_size;
 630       m_data = XNEWVEC (char, m_size);
 631     }
 632   else
 633     {
 634       const int offset = m_alloc_offset;
 635       offset_buffer (-offset);
 636       m_size *= 2;
 637       m_data = XRESIZEVEC (char, m_data, m_size);
 638       offset_buffer (offset);
 639     }
 640 }
 641
 642 /*  Read more data into the cache.  Extends the cache if need be.
 643     Returns TRUE iff new data could be read.  */
 644
 645 bool
 646 file_cache_slot::read_data ()
 647 {
 648   if (feof (m_fp) || ferror (m_fp))
 649     return false;
 650
 651   maybe_grow ();
 652
 653   char * from = m_data + m_nb_read;
 654   size_t to_read = m_size - m_nb_read;
 655   size_t nb_read = fread (from, 1, to_read, m_fp);
 656
 657   if (ferror (m_fp))
 658     return false;
 659
 660   m_nb_read += nb_read;
 661   return !!nb_read;
 662 }
 663
 664 /* Read new data iff the cache needs to be filled with more data
 665    coming from the file FP.  Return TRUE iff the cache was filled with
 666    mode data.  */
 667
 668 bool
 669 file_cache_slot::maybe_read_data ()
 670 {
 671   if (!needs_read_p ())
 672     return false;
 673   return read_data ();
 674 }
 675
 676 /* Helper function for file_cache_slot::get_next_line (), to find the end of
 677    the next line.  Returns with the memchr convention, i.e. nullptr if a line
 678    terminator was not found.  We need to determine line endings in the same
 679    manner that libcpp does: any of \n, \r\n, or \r is a line ending.  */
 680
 681 static char *
 682 find_end_of_line (char *s, size_t len)
 683 {
 684   for (const auto end = s + len; s != end; ++s)
 685     {
 686       if (*s == '\n')
 687         return s;
 688       if (*s == '\r')
 689         {
 690           const auto next = s + 1;
 691           if (next == end)
 692             {
 693               /* Don't find the line ending if \r is the very last character
 694                  in the buffer; we do not know if it's the end of the file or
 695                  just the end of what has been read so far, and we wouldn't
 696                  want to break in the middle of what's actually a \r\n
 697                  sequence.  Instead, we will handle the case of a file ending
 698                  in a \r later.  */
 699               break;
 700             }
 701           return (*next == '\n' ? next : s);
 702         }
 703     }
 704   return nullptr;
 705 }
 706
 707 /* Read a new line from file FP, using C as a cache for the data
 708    coming from the file.  Upon successful completion, *LINE is set to
 709    the beginning of the line found.  *LINE points directly in the
 710    line cache and is only valid until the next call of get_next_line.
 711    *LINE_LEN is set to the length of the line.  Note that the line
 712    does not contain any terminal delimiter.  This function returns
 713    true if some data was read or process from the cache, false
 714    otherwise.  Note that subsequent calls to get_next_line might
 715    make the content of *LINE invalid.  */
 716
 717 bool
 718 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
 719 {
 720   /* Fill the cache with data to process.  */
 721   maybe_read_data ();
 722
 723   size_t remaining_size = m_nb_read - m_line_start_idx;
 724   if (remaining_size == 0)
 725     /* There is no more data to process.  */
 726     return false;
 727
 728   char *line_start = m_data + m_line_start_idx;
 729
 730   char *next_line_start = NULL;
 731   size_t len = 0;
 732   char *line_end = find_end_of_line (line_start, remaining_size);
 733   if (line_end == NULL)
 734     {
 735       /* We haven't found an end-of-line delimiter in the cache.
 736          Fill the cache with more data from the file and look again.  */
 737       while (maybe_read_data ())
 738         {
 739           line_start = m_data + m_line_start_idx;
 740           remaining_size = m_nb_read - m_line_start_idx;
 741           line_end = find_end_of_line (line_start, remaining_size);
 742           if (line_end != NULL)
 743             {
 744               next_line_start = line_end + 1;
 745               break;
 746             }
 747         }
 748       if (line_end == NULL)
 749         {
 750           /* We've loaded all the file into the cache and still no
 751              terminator.  Let's say the line ends up at one byte past the
 752              end of the file.  This is to stay consistent with the case
 753              of when the line ends up with a terminator and line_end points to
 754              that.  That consistency is useful below in the len calculation.
 755
 756              If the file ends in a \r, we didn't identify it as a line
 757              terminator above, so do that now instead.  */
 758           line_end = m_data + m_nb_read;
 759           if (m_nb_read && line_end[-1] == '\r')
 760             {
 761               --line_end;
 762               m_missing_trailing_newline = false;
 763             }
 764           else
 765             m_missing_trailing_newline = true;
 766         }
 767       else
 768         m_missing_trailing_newline = false;
 769     }
 770   else
 771     {
 772       next_line_start = line_end + 1;
 773       m_missing_trailing_newline = false;
 774     }
 775
 776   if (m_fp && ferror (m_fp))
 777     return false;
 778
 779   /* At this point, we've found the end of the of line.  It either points to
 780      the line terminator or to one byte after the last byte of the file.  */
 781   gcc_assert (line_end != NULL);
 782
 783   len = line_end - line_start;
 784
 785   if (m_line_start_idx < m_nb_read)
 786     *line = line_start;
 787
 788   ++m_line_num;
 789
 790   /* Before we update our line record, make sure the hint about the
 791      total number of lines of the file is correct.  If it's not, then
 792      we give up recording line boundaries from now on.  */
 793   bool update_line_record = true;
 794   if (m_line_num > m_total_lines)
 795     update_line_record = false;
 796
 797     /* Now update our line record so that re-reading lines from the
 798      before m_line_start_idx is faster.  */
 799   if (update_line_record
 800       && m_line_record.length () < line_record_size)
 801     {
 802       /* If the file lines fits in the line record, we just record all
 803          its lines ...*/
 804       if (m_total_lines <= line_record_size
 805           && m_line_num > m_line_record.length ())
 806         m_line_record.safe_push
 807           (file_cache_slot::line_info (m_line_num,
 808                                        m_line_start_idx,
 809                                        line_end - m_data));
 810       else if (m_total_lines > line_record_size)
 811         {
 812           /* ... otherwise, we just scale total_lines down to
 813              (line_record_size lines.  */
 814           size_t n = (m_line_num * line_record_size) / m_total_lines;
 815           if (m_line_record.length () == 0
 816               || n >= m_line_record.length ())
 817             m_line_record.safe_push
 818               (file_cache_slot::line_info (m_line_num,
 819                                            m_line_start_idx,
 820                                            line_end - m_data));
 821         }
 822     }
 823
 824   /* Update m_line_start_idx so that it points to the next line to be
 825      read.  */
 826   if (next_line_start)
 827     m_line_start_idx = next_line_start - m_data;
 828   else
 829     /* We didn't find any terminal '\n'.  Let's consider that the end
 830        of line is the end of the data in the cache.  The next
 831        invocation of get_next_line will either read more data from the
 832        underlying file or return false early because we've reached the
 833        end of the file.  */
 834     m_line_start_idx = m_nb_read;
 835
 836   *line_len = len;
 837
 838   return true;
 839 }
 840
 841 /* Consume the next bytes coming from the cache (or from its
 842    underlying file if there are remaining unread bytes in the file)
 843    until we reach the next end-of-line (or end-of-file).  There is no
 844    copying from the cache involved.  Return TRUE upon successful
 845    completion.  */
 846
 847 bool
 848 file_cache_slot::goto_next_line ()
 849 {
 850   char *l;
 851   ssize_t len;
 852
 853   return get_next_line (&l, &len);
 854 }
 855
 856 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 857    If the line was read successfully, *LINE points to the beginning
 858    of the line in the file cache and *LINE_LEN is the length of the
 859    line.  *LINE is not nul-terminated, but may contain zero bytes.
 860    *LINE is only valid until the next call of read_line_num.
 861    This function returns bool if a line was read.  */
 862
 863 bool
 864 file_cache_slot::read_line_num (size_t line_num,
 865                        char ** line, ssize_t *line_len)
 866 {
 867   gcc_assert (line_num > 0);
 868
 869   if (line_num <= m_line_num)
 870     {
 871       /* We've been asked to read lines that are before m_line_num.
 872          So lets use our line record (if it's not empty) to try to
 873          avoid re-reading the file from the beginning again.  */
 874
 875       if (m_line_record.is_empty ())
 876         {
 877           m_line_start_idx = 0;
 878           m_line_num = 0;
 879         }
 880       else
 881         {
 882           file_cache_slot::line_info *i = NULL;
 883           if (m_total_lines <= line_record_size)
 884             {
 885               /* In languages where the input file is not totally
 886                  preprocessed up front, the m_total_lines hint
 887                  can be smaller than the number of lines of the
 888                  file.  In that case, only the first
 889                  m_total_lines have been recorded.
 890
 891                  Otherwise, the first m_total_lines we've read have
 892                  their start/end recorded here.  */
 893               i = (line_num <= m_total_lines)
 894                 ? &m_line_record[line_num - 1]
 895                 : &m_line_record[m_total_lines - 1];
 896               gcc_assert (i->line_num <= line_num);
 897             }
 898           else
 899             {
 900               /*  So the file had more lines than our line record
 901                   size.  Thus the number of lines we've recorded has
 902                   been scaled down to line_record_size.  Let's
 903                   pick the start/end of the recorded line that is
 904                   closest to line_num.  */
 905               size_t n = (line_num <= m_total_lines)
 906                 ? line_num * line_record_size / m_total_lines
 907                 : m_line_record.length () - 1;
 908               if (n < m_line_record.length ())
 909                 {
 910                   i = &m_line_record[n];
 911                   gcc_assert (i->line_num <= line_num);
 912                 }
 913             }
 914
 915           if (i && i->line_num == line_num)
 916             {
 917               /* We have the start/end of the line.  */
 918               *line = m_data + i->start_pos;
 919               *line_len = i->end_pos - i->start_pos;
 920               return true;
 921             }
 922
 923           if (i)
 924             {
 925               m_line_start_idx = i->start_pos;
 926               m_line_num = i->line_num - 1;
 927             }
 928           else
 929             {
 930               m_line_start_idx = 0;
 931               m_line_num = 0;
 932             }
 933         }
 934     }
 935
 936   /*  Let's walk from line m_line_num up to line_num - 1, without
 937       copying any line.  */
 938   while (m_line_num < line_num - 1)
 939     if (!goto_next_line ())
 940       return false;
 941
 942   /* The line we want is the next one.  Let's read and copy it back to
 943      the caller.  */
 944   return get_next_line (line, line_len);
 945 }
 946
 947 /* Return the physical source line that corresponds to FILE_PATH/LINE.
 948    The line is not nul-terminated.  The returned pointer is only
 949    valid until the next call of location_get_source_line.
 950    Note that the line can contain several null characters,
 951    so the returned value's length has the actual length of the line.
 952    If the function fails, a NULL char_span is returned.  */
 953
 954 char_span
 955 file_cache::get_source_line (const char *file_path, int line)
 956 {
 957   char *buffer = NULL;
 958   ssize_t len;
 959
 960   if (line == 0)
 961     return char_span (NULL, 0);
 962
 963   if (file_path == NULL)
 964     return char_span (NULL, 0);
 965
 966   file_cache_slot *c = lookup_or_add_file (file_path);
 967   if (c == NULL)
 968     return char_span (NULL, 0);
 969
 970   bool read = c->read_line_num (line, &buffer, &len);
 971   if (!read)
 972     return char_span (NULL, 0);
 973
 974   return char_span (buffer, len);
 975 }
 976
 977 char_span
 978 location_get_source_line (const char *file_path, int line)
 979 {
 980   diagnostic_file_cache_init ();
 981   return global_dc->m_file_cache->get_source_line (file_path, line);
 982 }
 983
 984 /* Return a NUL-terminated copy of the source text between two locations, or
 985    NULL if the arguments are invalid.  The caller is responsible for freeing
 986    the return value.  */
 987
 988 char *
 989 get_source_text_between (location_t start, location_t end)
 990 {
 991   expanded_location expstart =
 992     expand_location_to_spelling_point (start, LOCATION_ASPECT_START);
 993   expanded_location expend =
 994     expand_location_to_spelling_point (end, LOCATION_ASPECT_FINISH);
 995
 996   /* If the locations are in different files or the end comes before the
 997      start, give up and return nothing.  */
 998   if (!expstart.file || !expend.file)
 999     return NULL;
1000   if (strcmp (expstart.file, expend.file) != 0)
1001     return NULL;
1002   if (expstart.line > expend.line)
1003     return NULL;
1004   if (expstart.line == expend.line
1005       && expstart.column > expend.column)
1006     return NULL;
1007   /* These aren't real column numbers, give up.  */
1008   if (expstart.column == 0 || expend.column == 0)
1009     return NULL;
1010
1011   /* For a single line we need to trim both edges.  */
1012   if (expstart.line == expend.line)
1013     {
1014       char_span line = location_get_source_line (expstart.file, expstart.line);
1015       if (line.length () < 1)
1016         return NULL;
1017       int s = expstart.column - 1;
1018       int len = expend.column - s;
1019       if (line.length () < (size_t)expend.column)
1020         return NULL;
1021       return line.subspan (s, len).xstrdup ();
1022     }
1023
1024   struct obstack buf_obstack;
1025   obstack_init (&buf_obstack);
1026
1027   /* Loop through all lines in the range and append each to buf; may trim
1028      parts of the start and end lines off depending on column values.  */
1029   for (int lnum = expstart.line; lnum <= expend.line; ++lnum)
1030     {
1031       char_span line = location_get_source_line (expstart.file, lnum);
1032       if (line.length () < 1 && (lnum != expstart.line && lnum != expend.line))
1033         continue;
1034
1035       /* For the first line in the range, only start at expstart.column */
1036       if (lnum == expstart.line)
1037         {
1038           unsigned off = expstart.column - 1;
1039           if (line.length () < off)
1040             return NULL;
1041           line = line.subspan (off, line.length() - off);
1042         }
1043       /* For the last line, don't go past expend.column */
1044       else if (lnum == expend.line)
1045         {
1046           if (line.length () < (size_t)expend.column)
1047             return NULL;
1048           line = line.subspan (0, expend.column);
1049         }
1050
1051       /* Combine spaces at the beginning of later lines.  */
1052       if (lnum > expstart.line)
1053         {
1054           unsigned off;
1055           for (off = 0; off < line.length(); ++off)
1056             if (line[off] != ' ' && line[off] != '\t')
1057               break;
1058           if (off > 0)
1059             {
1060               obstack_1grow (&buf_obstack, ' ');
1061               line = line.subspan (off, line.length() - off);
1062             }
1063         }
1064
1065       /* This does not include any trailing newlines.  */
1066       obstack_grow (&buf_obstack, line.get_buffer (), line.length ());
1067     }
1068
1069   /* NUL-terminate and finish the buf obstack.  */
1070   obstack_1grow (&buf_obstack, 0);
1071   const char *buf = (const char *) obstack_finish (&buf_obstack);
1072
1073   return xstrdup (buf);
1074 }
1075
1076
1077 char_span
1078 file_cache::get_source_file_content (const char *file_path)
1079 {
1080   file_cache_slot *c = lookup_or_add_file (file_path);
1081   if (c == nullptr)
1082     return char_span (nullptr, 0);
1083   return c->get_full_file_content ();
1084 }
1085
1086
1087 /* Get a borrowed char_span to the full content of FILE_PATH
1088    as decoded according to the input charset, encoded as UTF-8.  */
1089
1090 char_span
1091 get_source_file_content (const char *file_path)
1092 {
1093   diagnostic_file_cache_init ();
1094   return global_dc->m_file_cache->get_source_file_content (file_path);
1095 }
1096
1097 /* Determine if FILE_PATH missing a trailing newline on its final line.
1098    Only valid to call once all of the file has been loaded, by
1099    requesting a line number beyond the end of the file.  */
1100
1101 bool
1102 location_missing_trailing_newline (const char *file_path)
1103 {
1104   diagnostic_file_cache_init ();
1105
1106   file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
1107   if (c == NULL)
1108     return false;
1109
1110   return c->missing_trailing_newline_p ();
1111 }
1112
1113 /* Test if the location originates from the spelling location of a
1114    builtin-tokens.  That is, return TRUE if LOC is a (possibly
1115    virtual) location of a built-in token that appears in the expansion
1116    list of a macro.  Please note that this function also works on
1117    tokens that result from built-in tokens.  For instance, the
1118    function would return true if passed a token "4" that is the result
1119    of the expansion of the built-in __LINE__ macro.  */
1120 bool
1121 is_location_from_builtin_token (location_t loc)
1122 {
1123   const line_map_ordinary *map = NULL;
1124   loc = linemap_resolve_location (line_table, loc,
1125                                   LRK_SPELLING_LOCATION, &map);
1126   return loc == BUILTINS_LOCATION;
1127 }
1128
1129 /* Expand the source location LOC into a human readable location.  If
1130    LOC is virtual, it resolves to the expansion point of the involved
1131    macro.  If LOC resolves to a builtin location, the file name of the
1132    readable location is set to the string "<built-in>".  */
1133
1134 expanded_location
1135 expand_location (location_t loc)
1136 {
1137   return expand_location_1 (loc, /*expansion_point_p=*/true,
1138                             LOCATION_ASPECT_CARET);
1139 }
1140
1141 /* Expand the source location LOC into a human readable location.  If
1142    LOC is virtual, it resolves to the expansion location of the
1143    relevant macro.  If LOC resolves to a builtin location, the file
1144    name of the readable location is set to the string
1145    "<built-in>".  */
1146
1147 expanded_location
1148 expand_location_to_spelling_point (location_t loc,
1149                                    enum location_aspect aspect)
1150 {
1151   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
1152 }
1153
1154 /* The rich_location class within libcpp requires a way to expand
1155    location_t instances, and relies on the client code
1156    providing a symbol named
1157      linemap_client_expand_location_to_spelling_point
1158    to do this.
1159
1160    This is the implementation for libcommon.a (all host binaries),
1161    which simply calls into expand_location_1.  */
1162
1163 expanded_location
1164 linemap_client_expand_location_to_spelling_point (location_t loc,
1165                                                   enum location_aspect aspect)
1166 {
1167   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
1168 }
1169
1170
1171 /* If LOCATION is in a system header and if it is a virtual location
1172    for a token coming from the expansion of a macro, unwind it to
1173    the location of the expansion point of the macro.  If the expansion
1174    point is also in a system header return the original LOCATION.
1175    Otherwise, return the location of the expansion point.
1176
1177    This is used for instance when we want to emit diagnostics about a
1178    token that may be located in a macro that is itself defined in a
1179    system header, for example, for the NULL macro.  In such a case, if
1180    LOCATION were passed directly to diagnostic functions such as
1181    warning_at, the diagnostic would be suppressed (unless
1182    -Wsystem-headers).  */
1183
1184 location_t
1185 expansion_point_location_if_in_system_header (location_t location)
1186 {
1187   if (!in_system_header_at (location))
1188     return location;
1189
1190   location_t xloc = linemap_resolve_location (line_table, location,
1191                                               LRK_MACRO_EXPANSION_POINT,
1192                                               NULL);
1193   return in_system_header_at (xloc) ? location : xloc;
1194 }
1195
1196 /* If LOCATION is a virtual location for a token coming from the expansion
1197    of a macro, unwind to the location of the expansion point of the macro.  */
1198
1199 location_t
1200 expansion_point_location (location_t location)
1201 {
1202   return linemap_resolve_location (line_table, location,
1203                                    LRK_MACRO_EXPANSION_POINT, NULL);
1204 }
1205
1206 /* Construct a location with caret at CARET, ranging from START to
1207    FINISH.
1208
1209    For example, consider:
1210
1211                  11111111112
1212         12345678901234567890
1213      522
1214      523   return foo + bar;
1215                   ~~~~^~~~~
1216      524
1217
1218    The location's caret is at the "+", line 523 column 15, but starts
1219    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
1220    of "bar" at column 19.  */
1221
1222 location_t
1223 make_location (location_t caret, location_t start, location_t finish)
1224 {
1225   return line_table->make_location (caret, start, finish);
1226 }
1227
1228 /* Same as above, but taking a source range rather than two locations.  */
1229
1230 location_t
1231 make_location (location_t caret, source_range src_range)
1232 {
1233   location_t pure_loc = get_pure_location (caret);
1234   return line_table->get_or_create_combined_loc (pure_loc, src_range,
1235                                                  nullptr, 0);
1236 }
1237
1238 /* An expanded_location stores the column in byte units.  This function
1239    converts that column to display units.  That requires reading the associated
1240    source line in order to calculate the display width.  If that cannot be done
1241    for any reason, then returns the byte column as a fallback.  */
1242 int
1243 location_compute_display_column (expanded_location exploc,
1244                                  const cpp_char_column_policy &policy)
1245 {
1246   if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1247     return exploc.column;
1248   char_span line = location_get_source_line (exploc.file, exploc.line);
1249   /* If line is NULL, this function returns exploc.column which is the
1250      desired fallback.  */
1251   return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
1252                                             exploc.column, policy);
1253 }
1254
1255 /* Dump statistics to stderr about the memory usage of the line_table
1256    set of line maps.  This also displays some statistics about macro
1257    expansion.  */
1258
1259 void
1260 dump_line_table_statistics (void)
1261 {
1262   struct linemap_stats s;
1263   long total_used_map_size,
1264     macro_maps_size,
1265     total_allocated_map_size;
1266
1267   memset (&s, 0, sizeof (s));
1268
1269   linemap_get_statistics (line_table, &s);
1270
1271   macro_maps_size = s.macro_maps_used_size
1272     + s.macro_maps_locations_size;
1273
1274   total_allocated_map_size = s.ordinary_maps_allocated_size
1275     + s.macro_maps_allocated_size
1276     + s.macro_maps_locations_size;
1277
1278   total_used_map_size = s.ordinary_maps_used_size
1279     + s.macro_maps_used_size
1280     + s.macro_maps_locations_size;
1281
1282   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
1283            s.num_expanded_macros);
1284   if (s.num_expanded_macros != 0)
1285     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
1286              s.num_macro_tokens / s.num_expanded_macros);
1287   fprintf (stderr,
1288            "\nLine Table allocations during the "
1289            "compilation process\n");
1290   fprintf (stderr, "Number of ordinary maps used:        " PRsa (5) "\n",
1291            SIZE_AMOUNT (s.num_ordinary_maps_used));
1292   fprintf (stderr, "Ordinary map used size:              " PRsa (5) "\n",
1293            SIZE_AMOUNT (s.ordinary_maps_used_size));
1294   fprintf (stderr, "Number of ordinary maps allocated:   " PRsa (5) "\n",
1295            SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1296   fprintf (stderr, "Ordinary maps allocated size:        " PRsa (5) "\n",
1297            SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1298   fprintf (stderr, "Number of macro maps used:           " PRsa (5) "\n",
1299            SIZE_AMOUNT (s.num_macro_maps_used));
1300   fprintf (stderr, "Macro maps used size:                " PRsa (5) "\n",
1301            SIZE_AMOUNT (s.macro_maps_used_size));
1302   fprintf (stderr, "Macro maps locations size:           " PRsa (5) "\n",
1303            SIZE_AMOUNT (s.macro_maps_locations_size));
1304   fprintf (stderr, "Macro maps size:                     " PRsa (5) "\n",
1305            SIZE_AMOUNT (macro_maps_size));
1306   fprintf (stderr, "Duplicated maps locations size:      " PRsa (5) "\n",
1307            SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1308   fprintf (stderr, "Total allocated maps size:           " PRsa (5) "\n",
1309            SIZE_AMOUNT (total_allocated_map_size));
1310   fprintf (stderr, "Total used maps size:                " PRsa (5) "\n",
1311            SIZE_AMOUNT (total_used_map_size));
1312   fprintf (stderr, "Ad-hoc table size:                   " PRsa (5) "\n",
1313            SIZE_AMOUNT (s.adhoc_table_size));
1314   fprintf (stderr, "Ad-hoc table entries used:           " PRsa (5) "\n",
1315            SIZE_AMOUNT (s.adhoc_table_entries_used));
1316   fprintf (stderr, "optimized_ranges:                    " PRsa (5) "\n",
1317            SIZE_AMOUNT (line_table->m_num_optimized_ranges));
1318   fprintf (stderr, "unoptimized_ranges:                  " PRsa (5) "\n",
1319            SIZE_AMOUNT (line_table->m_num_unoptimized_ranges));
1320
1321   fprintf (stderr, "\n");
1322 }
1323
1324 /* Get location one beyond the final location in ordinary map IDX.  */
1325
1326 static location_t
1327 get_end_location (class line_maps *set, unsigned int idx)
1328 {
1329   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1330     return set->highest_location;
1331
1332   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1333   return MAP_START_LOCATION (next_map);
1334 }
1335
1336 /* Helper function for write_digit_row.  */
1337
1338 static void
1339 write_digit (FILE *stream, int digit)
1340 {
1341   fputc ('0' + (digit % 10), stream);
1342 }
1343
1344 /* Helper function for dump_location_info.
1345    Write a row of numbers to STREAM, numbering a source line,
1346    giving the units, tens, hundreds etc of the column number.  */
1347
1348 static void
1349 write_digit_row (FILE *stream, int indent,
1350                  const line_map_ordinary *map,
1351                  location_t loc, int max_col, int divisor)
1352 {
1353   fprintf (stream, "%*c", indent, ' ');
1354   fprintf (stream, "|");
1355   for (int column = 1; column < max_col; column++)
1356     {
1357       location_t column_loc = loc + (column << map->m_range_bits);
1358       write_digit (stream, column_loc / divisor);
1359     }
1360   fprintf (stream, "\n");
1361 }
1362
1363 /* Write a half-closed (START) / half-open (END) interval of
1364    location_t to STREAM.  */
1365
1366 static void
1367 dump_location_range (FILE *stream,
1368                      location_t start, location_t end)
1369 {
1370   fprintf (stream,
1371            "  location_t interval: %u <= loc < %u\n",
1372            start, end);
1373 }
1374
1375 /* Write a labelled description of a half-closed (START) / half-open (END)
1376    interval of location_t to STREAM.  */
1377
1378 static void
1379 dump_labelled_location_range (FILE *stream,
1380                               const char *name,
1381                               location_t start, location_t end)
1382 {
1383   fprintf (stream, "%s\n", name);
1384   dump_location_range (stream, start, end);
1385   fprintf (stream, "\n");
1386 }
1387
1388 /* Write a visualization of the locations in the line_table to STREAM.  */
1389
1390 void
1391 dump_location_info (FILE *stream)
1392 {
1393   /* Visualize the reserved locations.  */
1394   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1395                                 0, RESERVED_LOCATION_COUNT);
1396
1397   /* Visualize the ordinary line_map instances, rendering the sources. */
1398   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1399     {
1400       location_t end_location = get_end_location (line_table, idx);
1401       /* half-closed: doesn't include this one. */
1402
1403       const line_map_ordinary *map
1404         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1405       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1406       dump_location_range (stream,
1407                            MAP_START_LOCATION (map), end_location);
1408       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1409       fprintf (stream, "  starting at line: %i\n",
1410                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1411       fprintf (stream, "  column and range bits: %i\n",
1412                map->m_column_and_range_bits);
1413       fprintf (stream, "  column bits: %i\n",
1414                map->m_column_and_range_bits - map->m_range_bits);
1415       fprintf (stream, "  range bits: %i\n",
1416                map->m_range_bits);
1417       const char * reason;
1418       switch (map->reason) {
1419       case LC_ENTER:
1420         reason = "LC_ENTER";
1421         break;
1422       case LC_LEAVE:
1423         reason = "LC_LEAVE";
1424         break;
1425       case LC_RENAME:
1426         reason = "LC_RENAME";
1427         break;
1428       case LC_RENAME_VERBATIM:
1429         reason = "LC_RENAME_VERBATIM";
1430         break;
1431       case LC_ENTER_MACRO:
1432         reason = "LC_RENAME_MACRO";
1433         break;
1434       default:
1435         reason = "Unknown";
1436       }
1437       fprintf (stream, "  reason: %d (%s)\n", map->reason, reason);
1438
1439       const line_map_ordinary *includer_map
1440         = linemap_included_from_linemap (line_table, map);
1441       fprintf (stream, "  included from location: %d",
1442                linemap_included_from (map));
1443       if (includer_map) {
1444         fprintf (stream, " (in ordinary map %d)",
1445                  int (includer_map - line_table->info_ordinary.maps));
1446       }
1447       fprintf (stream, "\n");
1448
1449       /* Render the span of source lines that this "map" covers.  */
1450       for (location_t loc = MAP_START_LOCATION (map);
1451            loc < end_location;
1452            loc += (1 << map->m_range_bits) )
1453         {
1454           gcc_assert (pure_location_p (line_table, loc) );
1455
1456           expanded_location exploc
1457             = linemap_expand_location (line_table, map, loc);
1458
1459           if (exploc.column == 0)
1460             {
1461               /* Beginning of a new source line: draw the line.  */
1462
1463               char_span line_text = location_get_source_line (exploc.file,
1464                                                               exploc.line);
1465               if (!line_text)
1466                 break;
1467               fprintf (stream,
1468                        "%s:%3i|loc:%5i|%.*s\n",
1469                        exploc.file, exploc.line,
1470                        loc,
1471                        (int)line_text.length (), line_text.get_buffer ());
1472
1473               /* "loc" is at column 0, which means "the whole line".
1474                  Render the locations *within* the line, by underlining
1475                  it, showing the location_t numeric values
1476                  at each column.  */
1477               size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1478               if (max_col > line_text.length ())
1479                 max_col = line_text.length () + 1;
1480
1481               int len_lnum = num_digits (exploc.line);
1482               if (len_lnum < 3)
1483                 len_lnum = 3;
1484               int len_loc = num_digits (loc);
1485               if (len_loc < 5)
1486                 len_loc = 5;
1487
1488               int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1489
1490               /* Thousands.  */
1491               if (end_location > 999)
1492                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1493
1494               /* Hundreds.  */
1495               if (end_location > 99)
1496                 write_digit_row (stream, indent, map, loc, max_col, 100);
1497
1498               /* Tens.  */
1499               write_digit_row (stream, indent, map, loc, max_col, 10);
1500
1501               /* Units.  */
1502               write_digit_row (stream, indent, map, loc, max_col, 1);
1503             }
1504         }
1505       fprintf (stream, "\n");
1506     }
1507
1508   /* Visualize unallocated values.  */
1509   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1510                                 line_table->highest_location,
1511                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1512
1513   /* Visualize the macro line_map instances, rendering the sources. */
1514   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1515     {
1516       /* Each macro map that is allocated owns location_t values
1517          that are *lower* that the one before them.
1518          Hence it's meaningful to view them either in order of ascending
1519          source locations, or in order of ascending macro map index.  */
1520       const bool ascending_location_ts = true;
1521       unsigned int idx = (ascending_location_ts
1522                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1523                           : i);
1524       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1525       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1526                idx,
1527                linemap_map_get_macro_name (map),
1528                MACRO_MAP_NUM_MACRO_TOKENS (map));
1529       dump_location_range (stream,
1530                            map->start_location,
1531                            (map->start_location
1532                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1533       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1534               "expansion point is location %i",
1535               MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1536       fprintf (stream, "  map->start_location: %u\n",
1537                map->start_location);
1538
1539       fprintf (stream, "  macro_locations:\n");
1540       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1541         {
1542           location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1543           location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1544
1545           /* linemap_add_macro_token encodes token numbers in an expansion
1546              by putting them after MAP_START_LOCATION. */
1547
1548           /* I'm typically seeing 4 uninitialized entries at the end of
1549              0xafafafaf.
1550              This appears to be due to macro.cc:replace_args
1551              adding 2 extra args for padding tokens; presumably there may
1552              be a leading and/or trailing padding token injected,
1553              each for 2 more location slots.
1554              This would explain there being up to 4 location_ts slots
1555              that may be uninitialized.  */
1556
1557           fprintf (stream, "    %u: %u, %u\n",
1558                    i,
1559                    x,
1560                    y);
1561           if (x == y)
1562             {
1563               if (x < MAP_START_LOCATION (map))
1564                 inform (x, "token %u has %<x-location == y-location == %u%>",
1565                         i, x);
1566               else
1567                 fprintf (stream,
1568                          "x-location == y-location == %u encodes token # %u\n",
1569                          x, x - MAP_START_LOCATION (map));
1570                 }
1571           else
1572             {
1573               inform (x, "token %u has %<x-location == %u%>", i, x);
1574               inform (x, "token %u has %<y-location == %u%>", i, y);
1575             }
1576         }
1577       fprintf (stream, "\n");
1578     }
1579
1580   /* It appears that MAX_LOCATION_T itself is never assigned to a
1581      macro map, presumably due to an off-by-one error somewhere
1582      between the logic in linemap_enter_macro and
1583      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1584   dump_labelled_location_range (stream, "MAX_LOCATION_T",
1585                                 MAX_LOCATION_T,
1586                                 MAX_LOCATION_T + 1);
1587
1588   /* Visualize ad-hoc values.  */
1589   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1590                                 MAX_LOCATION_T + 1, UINT_MAX);
1591 }
1592
1593 /* string_concat's constructor.  */
1594
1595 string_concat::string_concat (int num, location_t *locs)
1596   : m_num (num)
1597 {
1598   m_locs = ggc_vec_alloc <location_t> (num);
1599   for (int i = 0; i < num; i++)
1600     m_locs[i] = locs[i];
1601 }
1602
1603 /* string_concat_db's constructor.  */
1604
1605 string_concat_db::string_concat_db ()
1606 {
1607   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1608 }
1609
1610 /* Record that a string concatenation occurred, covering NUM
1611    string literal tokens.  LOCS is an array of size NUM, containing the
1612    locations of the tokens.  A copy of LOCS is taken.  */
1613
1614 void
1615 string_concat_db::record_string_concatenation (int num, location_t *locs)
1616 {
1617   gcc_assert (num > 1);
1618   gcc_assert (locs);
1619
1620   location_t key_loc = get_key_loc (locs[0]);
1621   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1622      any data now recorded under key 'key_loc' would be overwritten by a
1623      subsequent call with the same key 'key_loc'.  */
1624   if (RESERVED_LOCATION_P (key_loc))
1625     return;
1626
1627   string_concat *concat
1628     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1629   m_table->put (key_loc, concat);
1630 }
1631
1632 /* Determine if LOC was the location of the initial token of a
1633    concatenation of string literal tokens.
1634    If so, *OUT_NUM is written to with the number of tokens, and
1635    *OUT_LOCS with the location of an array of locations of the
1636    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1637    storage owned by the string_concat_db.
1638    Otherwise, return false.  */
1639
1640 bool
1641 string_concat_db::get_string_concatenation (location_t loc,
1642                                             int *out_num,
1643                                             location_t **out_locs)
1644 {
1645   gcc_assert (out_num);
1646   gcc_assert (out_locs);
1647
1648   location_t key_loc = get_key_loc (loc);
1649   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1650      discussion in 'string_concat_db::record_string_concatenation'.  */
1651   if (RESERVED_LOCATION_P (key_loc))
1652     return false;
1653
1654   string_concat **concat = m_table->get (key_loc);
1655   if (!concat)
1656     return false;
1657
1658   *out_num = (*concat)->m_num;
1659   *out_locs =(*concat)->m_locs;
1660   return true;
1661 }
1662
1663 /* Internal function.  Canonicalize LOC into a form suitable for
1664    use as a key within the database, stripping away macro expansion,
1665    ad-hoc information, and range information, using the location of
1666    the start of LOC within an ordinary linemap.  */
1667
1668 location_t
1669 string_concat_db::get_key_loc (location_t loc)
1670 {
1671   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1672                                   NULL);
1673
1674   loc = get_range_from_loc (line_table, loc).m_start;
1675
1676   return loc;
1677 }
1678
1679 /* Helper class for use within get_substring_ranges_for_loc.
1680    An vec of cpp_string with responsibility for releasing all of the
1681    str->text for each str in the vector.  */
1682
1683 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1684 {
1685  public:
1686   auto_cpp_string_vec (int alloc)
1687     : auto_vec <cpp_string> (alloc) {}
1688
1689   ~auto_cpp_string_vec ()
1690   {
1691     /* Clean up the copies within this vec.  */
1692     int i;
1693     cpp_string *str;
1694     FOR_EACH_VEC_ELT (*this, i, str)
1695       free (const_cast <unsigned char *> (str->text));
1696   }
1697 };
1698
1699 /* Attempt to populate RANGES with source location information on the
1700    individual characters within the string literal found at STRLOC.
1701    If CONCATS is non-NULL, then any string literals that the token at
1702    STRLOC  was concatenated with are also added to RANGES.
1703
1704    Return NULL if successful, or an error message if any errors occurred (in
1705    which case RANGES may be only partially populated and should not
1706    be used).
1707
1708    This is implemented by re-parsing the relevant source line(s).  */
1709
1710 static const char *
1711 get_substring_ranges_for_loc (cpp_reader *pfile,
1712                               string_concat_db *concats,
1713                               location_t strloc,
1714                               enum cpp_ttype type,
1715                               cpp_substring_ranges &ranges)
1716 {
1717   gcc_assert (pfile);
1718
1719   if (strloc == UNKNOWN_LOCATION)
1720     return "unknown location";
1721
1722   /* Reparsing the strings requires accurate location information.
1723      If -ftrack-macro-expansion has been overridden from its default
1724      of 2, then we might have a location of a macro expansion point,
1725      rather than the location of the literal itself.
1726      Avoid this by requiring that we have full macro expansion tracking
1727      for substring locations to be available.  */
1728   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1729     return "track_macro_expansion != 2";
1730
1731   /* If #line or # 44 "file"-style directives are present, then there's
1732      no guarantee that the line numbers we have can be used to locate
1733      the strings.  For example, we might have a .i file with # directives
1734      pointing back to lines within a .c file, but the .c file might
1735      have been edited since the .i file was created.
1736      In such a case, the safest course is to disable on-demand substring
1737      locations.  */
1738   if (line_table->seen_line_directive)
1739     return "seen line directive";
1740
1741   /* If string concatenation has occurred at STRLOC, get the locations
1742      of all of the literal tokens making up the compound string.
1743      Otherwise, just use STRLOC.  */
1744   int num_locs = 1;
1745   location_t *strlocs = &strloc;
1746   if (concats)
1747     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1748
1749   auto_cpp_string_vec strs (num_locs);
1750   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1751   for (int i = 0; i < num_locs; i++)
1752     {
1753       /* Get range of strloc.  We will use it to locate the start and finish
1754          of the literal token within the line.  */
1755       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1756
1757       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1758         {
1759           /* If the string token was within a macro expansion, then we can
1760              cope with it for the simple case where we have a single token.
1761              Otherwise, bail out.  */
1762           if (src_range.m_start != src_range.m_finish)
1763             return "macro expansion";
1764         }
1765       else
1766         {
1767           if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1768             /* If so, we can't reliably determine where the token started within
1769                its line.  */
1770             return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1771
1772           if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1773             /* If so, we can't reliably determine where the token finished
1774                within its line.  */
1775             return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1776         }
1777
1778       expanded_location start
1779         = expand_location_to_spelling_point (src_range.m_start,
1780                                              LOCATION_ASPECT_START);
1781       expanded_location finish
1782         = expand_location_to_spelling_point (src_range.m_finish,
1783                                              LOCATION_ASPECT_FINISH);
1784       if (start.file != finish.file)
1785         return "range endpoints are in different files";
1786       if (start.line != finish.line)
1787         return "range endpoints are on different lines";
1788       if (start.column > finish.column)
1789         return "range endpoints are reversed";
1790
1791       char_span line = location_get_source_line (start.file, start.line);
1792       if (!line)
1793         return "unable to read source line";
1794
1795       /* Determine the location of the literal (including quotes
1796          and leading prefix chars, such as the 'u' in a u""
1797          token).  */
1798       size_t literal_length = finish.column - start.column + 1;
1799
1800       /* Ensure that we don't crash if we got the wrong location.  */
1801       if (start.column < 1)
1802         return "zero start column";
1803       if (line.length () < (start.column - 1 + literal_length))
1804         return "line is not wide enough";
1805
1806       char_span literal = line.subspan (start.column - 1, literal_length);
1807
1808       cpp_string from;
1809       from.len = literal_length;
1810       /* Make a copy of the literal, to avoid having to rely on
1811          the lifetime of the copy of the line within the cache.
1812          This will be released by the auto_cpp_string_vec dtor.  */
1813       from.text = (unsigned char *)literal.xstrdup ();
1814       strs.safe_push (from);
1815
1816       /* For very long lines, a new linemap could have started
1817          halfway through the token.
1818          Ensure that the loc_reader uses the linemap of the
1819          *end* of the token for its start location.  */
1820       const line_map_ordinary *start_ord_map;
1821       linemap_resolve_location (line_table, src_range.m_start,
1822                                 LRK_SPELLING_LOCATION, &start_ord_map);
1823       const line_map_ordinary *final_ord_map;
1824       linemap_resolve_location (line_table, src_range.m_finish,
1825                                 LRK_SPELLING_LOCATION, &final_ord_map);
1826       if (start_ord_map == NULL || final_ord_map == NULL)
1827         return "failed to get ordinary maps";
1828       /* Bulletproofing.  We ought to only have different ordinary maps
1829          for start vs finish due to line-length jumps.  */
1830       if (start_ord_map != final_ord_map
1831           && start_ord_map->to_file != final_ord_map->to_file)
1832         return "start and finish are spelled in different ordinary maps";
1833       /* The file from linemap_resolve_location ought to match that from
1834          expand_location_to_spelling_point.  */
1835       if (start_ord_map->to_file != start.file)
1836         return "mismatching file after resolving linemap";
1837
1838       location_t start_loc
1839         = linemap_position_for_line_and_column (line_table, final_ord_map,
1840                                                 start.line, start.column);
1841
1842       cpp_string_location_reader loc_reader (start_loc, line_table);
1843       loc_readers.safe_push (loc_reader);
1844     }
1845
1846   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1847   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1848                                                  loc_readers.address (),
1849                                                  num_locs, &ranges, type);
1850   if (err)
1851     return err;
1852
1853   /* Success: "ranges" should now contain information on the string.  */
1854   return NULL;
1855 }
1856
1857 /* Attempt to populate *OUT_LOC with source location information on the
1858    given characters within the string literal found at STRLOC.
1859    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1860    character set.
1861
1862    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1863    and string literal "012345\n789"
1864    *OUT_LOC is written to with:
1865      "012345\n789"
1866          ~^~~~~
1867
1868    If CONCATS is non-NULL, then any string literals that the token at
1869    STRLOC was concatenated with are also considered.
1870
1871    This is implemented by re-parsing the relevant source line(s).
1872
1873    Return NULL if successful, or an error message if any errors occurred.
1874    Error messages are intended for GCC developers (to help debugging) rather
1875    than for end-users.  */
1876
1877 const char *
1878 get_location_within_string (cpp_reader *pfile,
1879                             string_concat_db *concats,
1880                             location_t strloc,
1881                             enum cpp_ttype type,
1882                             int caret_idx, int start_idx, int end_idx,
1883                             location_t *out_loc)
1884 {
1885   gcc_checking_assert (caret_idx >= 0);
1886   gcc_checking_assert (start_idx >= 0);
1887   gcc_checking_assert (end_idx >= 0);
1888   gcc_assert (out_loc);
1889
1890   cpp_substring_ranges ranges;
1891   const char *err
1892     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1893   if (err)
1894     return err;
1895
1896   if (caret_idx >= ranges.get_num_ranges ())
1897     return "caret_idx out of range";
1898   if (start_idx >= ranges.get_num_ranges ())
1899     return "start_idx out of range";
1900   if (end_idx >= ranges.get_num_ranges ())
1901     return "end_idx out of range";
1902
1903   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1904                             ranges.get_range (start_idx).m_start,
1905                             ranges.get_range (end_idx).m_finish);
1906   return NULL;
1907 }
1908
1909 /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */
1910
1911 location_t
1912 location_with_discriminator (location_t locus, int discriminator)
1913 {
1914   tree block = LOCATION_BLOCK (locus);
1915   source_range src_range = get_range_from_loc (line_table, locus);
1916   locus = get_pure_location (locus);
1917
1918   if (locus == UNKNOWN_LOCATION)
1919     return locus;
1920
1921   return line_table->get_or_create_combined_loc (locus, src_range, block,
1922                                                  discriminator);
1923 }
1924
1925 /* Return TRUE if LOCUS represents a location with a discriminator.  */
1926
1927 bool
1928 has_discriminator (location_t locus)
1929 {
1930   return get_discriminator_from_loc (locus) != 0;
1931 }
1932
1933 /* Return the discriminator for LOCUS.  */
1934
1935 int
1936 get_discriminator_from_loc (location_t locus)
1937 {
1938   return get_discriminator_from_loc (line_table, locus);
1939 }
1940
1941 #if CHECKING_P
1942
1943 namespace selftest {
1944
1945 /* Selftests of location handling.  */
1946
1947 /* Attempt to populate *OUT_RANGE with source location information on the
1948    given character within the string literal found at STRLOC.
1949    CHAR_IDX refers to an offset within the execution character set.
1950    If CONCATS is non-NULL, then any string literals that the token at
1951    STRLOC was concatenated with are also considered.
1952
1953    This is implemented by re-parsing the relevant source line(s).
1954
1955    Return NULL if successful, or an error message if any errors occurred.
1956    Error messages are intended for GCC developers (to help debugging) rather
1957    than for end-users.  */
1958
1959 static const char *
1960 get_source_range_for_char (cpp_reader *pfile,
1961                            string_concat_db *concats,
1962                            location_t strloc,
1963                            enum cpp_ttype type,
1964                            int char_idx,
1965                            source_range *out_range)
1966 {
1967   gcc_checking_assert (char_idx >= 0);
1968   gcc_assert (out_range);
1969
1970   cpp_substring_ranges ranges;
1971   const char *err
1972     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1973   if (err)
1974     return err;
1975
1976   if (char_idx >= ranges.get_num_ranges ())
1977     return "char_idx out of range";
1978
1979   *out_range = ranges.get_range (char_idx);
1980   return NULL;
1981 }
1982
1983 /* As get_source_range_for_char, but write to *OUT the number
1984    of ranges that are available.  */
1985
1986 static const char *
1987 get_num_source_ranges_for_substring (cpp_reader *pfile,
1988                                      string_concat_db *concats,
1989                                      location_t strloc,
1990                                      enum cpp_ttype type,
1991                                      int *out)
1992 {
1993   gcc_assert (out);
1994
1995   cpp_substring_ranges ranges;
1996   const char *err
1997     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1998
1999   if (err)
2000     return err;
2001
2002   *out = ranges.get_num_ranges ();
2003   return NULL;
2004 }
2005
2006 /* Selftests of location handling.  */
2007
2008 /* Verify that compare() on linenum_type handles comparisons over the full
2009    range of the type.  */
2010
2011 static void
2012 test_linenum_comparisons ()
2013 {
2014   linenum_type min_line (0);
2015   linenum_type max_line (0xffffffff);
2016   ASSERT_EQ (0, compare (min_line, min_line));
2017   ASSERT_EQ (0, compare (max_line, max_line));
2018
2019   ASSERT_GT (compare (max_line, min_line), 0);
2020   ASSERT_LT (compare (min_line, max_line), 0);
2021 }
2022
2023 /* Helper function for verifying location data: when location_t
2024    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
2025    as having column 0.  */
2026
2027 static bool
2028 should_have_column_data_p (location_t loc)
2029 {
2030   if (IS_ADHOC_LOC (loc))
2031     loc = get_location_from_adhoc_loc (line_table, loc);
2032   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
2033     return false;
2034   return true;
2035 }
2036
2037 /* Selftest for should_have_column_data_p.  */
2038
2039 static void
2040 test_should_have_column_data_p ()
2041 {
2042   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
2043   ASSERT_TRUE
2044     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
2045   ASSERT_FALSE
2046     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
2047 }
2048
2049 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
2050    on LOC.  */
2051
2052 static void
2053 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
2054               location_t loc)
2055 {
2056   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
2057   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
2058   /* If location_t values are sufficiently high, then column numbers
2059      will be unavailable and LOCATION_COLUMN (loc) will be 0.
2060      When close to the threshold, column numbers *may* be present: if
2061      the final linemap before the threshold contains a line that straddles
2062      the threshold, locations in that line have column information.  */
2063   if (should_have_column_data_p (loc))
2064     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
2065 }
2066
2067 /* Various selftests involve constructing a line table and one or more
2068    line maps within it.
2069
2070    For maximum test coverage we want to run these tests with a variety
2071    of situations:
2072    - line_table->default_range_bits: some frontends use a non-zero value
2073    and others use zero
2074    - the fallback modes within line-map.cc: there are various threshold
2075    values for location_t beyond line-map.cc changes
2076    behavior (disabling of the range-packing optimization, disabling
2077    of column-tracking).  We can exercise these by starting the line_table
2078    at interesting values at or near these thresholds.
2079
2080    The following struct describes a particular case within our test
2081    matrix.  */
2082
2083 class line_table_case
2084 {
2085 public:
2086   line_table_case (int default_range_bits, int base_location)
2087   : m_default_range_bits (default_range_bits),
2088     m_base_location (base_location)
2089   {}
2090
2091   int m_default_range_bits;
2092   int m_base_location;
2093 };
2094
2095 /* Constructor.  Store the old value of line_table, and create a new
2096    one, using sane defaults.  */
2097
2098 line_table_test::line_table_test ()
2099 {
2100   gcc_assert (saved_line_table == NULL);
2101   saved_line_table = line_table;
2102   line_table = ggc_alloc<line_maps> ();
2103   linemap_init (line_table, BUILTINS_LOCATION);
2104   gcc_assert (saved_line_table->m_reallocator);
2105   line_table->m_reallocator = saved_line_table->m_reallocator;
2106   gcc_assert (saved_line_table->m_round_alloc_size);
2107   line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2108   line_table->default_range_bits = 0;
2109 }
2110
2111 /* Constructor.  Store the old value of line_table, and create a new
2112    one, using the sitation described in CASE_.  */
2113
2114 line_table_test::line_table_test (const line_table_case &case_)
2115 {
2116   gcc_assert (saved_line_table == NULL);
2117   saved_line_table = line_table;
2118   line_table = ggc_alloc<line_maps> ();
2119   linemap_init (line_table, BUILTINS_LOCATION);
2120   gcc_assert (saved_line_table->m_reallocator);
2121   line_table->m_reallocator = saved_line_table->m_reallocator;
2122   gcc_assert (saved_line_table->m_round_alloc_size);
2123   line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2124   line_table->default_range_bits = case_.m_default_range_bits;
2125   if (case_.m_base_location)
2126     {
2127       line_table->highest_location = case_.m_base_location;
2128       line_table->highest_line = case_.m_base_location;
2129     }
2130 }
2131
2132 /* Destructor.  Restore the old value of line_table.  */
2133
2134 line_table_test::~line_table_test ()
2135 {
2136   gcc_assert (saved_line_table != NULL);
2137   line_table = saved_line_table;
2138   saved_line_table = NULL;
2139 }
2140
2141 /* Verify basic operation of ordinary linemaps.  */
2142
2143 static void
2144 test_accessing_ordinary_linemaps (const line_table_case &case_)
2145 {
2146   line_table_test ltt (case_);
2147
2148   /* Build a simple linemap describing some locations. */
2149   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
2150
2151   linemap_line_start (line_table, 1, 100);
2152   location_t loc_a = linemap_position_for_column (line_table, 1);
2153   location_t loc_b = linemap_position_for_column (line_table, 23);
2154
2155   linemap_line_start (line_table, 2, 100);
2156   location_t loc_c = linemap_position_for_column (line_table, 1);
2157   location_t loc_d = linemap_position_for_column (line_table, 17);
2158
2159   /* Example of a very long line.  */
2160   linemap_line_start (line_table, 3, 2000);
2161   location_t loc_e = linemap_position_for_column (line_table, 700);
2162
2163   /* Transitioning back to a short line.  */
2164   linemap_line_start (line_table, 4, 0);
2165   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
2166
2167   if (should_have_column_data_p (loc_back_to_short))
2168     {
2169       /* Verify that we switched to short lines in the linemap.  */
2170       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
2171       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
2172     }
2173
2174   /* Example of a line that will eventually be seen to be longer
2175      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
2176      below that.  */
2177   linemap_line_start (line_table, 5, 2000);
2178
2179   location_t loc_start_of_very_long_line
2180     = linemap_position_for_column (line_table, 2000);
2181   location_t loc_too_wide
2182     = linemap_position_for_column (line_table, 4097);
2183   location_t loc_too_wide_2
2184     = linemap_position_for_column (line_table, 4098);
2185
2186   /* ...and back to a sane line length.  */
2187   linemap_line_start (line_table, 6, 100);
2188   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
2189
2190   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2191
2192   /* Multiple files.  */
2193   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
2194   linemap_line_start (line_table, 1, 200);
2195   location_t loc_f = linemap_position_for_column (line_table, 150);
2196   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2197
2198   /* Verify that we can recover the location info.  */
2199   assert_loceq ("foo.c", 1, 1, loc_a);
2200   assert_loceq ("foo.c", 1, 23, loc_b);
2201   assert_loceq ("foo.c", 2, 1, loc_c);
2202   assert_loceq ("foo.c", 2, 17, loc_d);
2203   assert_loceq ("foo.c", 3, 700, loc_e);
2204   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
2205
2206   /* In the very wide line, the initial location should be fully tracked.  */
2207   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
2208   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
2209      be disabled.  */
2210   assert_loceq ("foo.c", 5, 0, loc_too_wide);
2211   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
2212   /*...and column-tracking should be re-enabled for subsequent lines.  */
2213   assert_loceq ("foo.c", 6, 10, loc_sane_again);
2214
2215   assert_loceq ("bar.c", 1, 150, loc_f);
2216
2217   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2218   ASSERT_TRUE (pure_location_p (line_table, loc_a));
2219
2220   /* Verify using make_location to build a range, and extracting data
2221      back from it.  */
2222   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2223   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2224   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2225   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2226   ASSERT_EQ (loc_b, src_range.m_start);
2227   ASSERT_EQ (loc_d, src_range.m_finish);
2228 }
2229
2230 /* Verify various properties of UNKNOWN_LOCATION.  */
2231
2232 static void
2233 test_unknown_location ()
2234 {
2235   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2236   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2237   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2238 }
2239
2240 /* Verify various properties of BUILTINS_LOCATION.  */
2241
2242 static void
2243 test_builtins ()
2244 {
2245   assert_loceq (special_fname_builtin (), 0, 0, BUILTINS_LOCATION);
2246   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2247 }
2248
2249 /* Regression test for make_location.
2250    Ensure that we use pure locations for the start/finish of the range,
2251    rather than storing a packed or ad-hoc range as the start/finish.  */
2252
2253 static void
2254 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2255 {
2256   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2257      with C++ frontend.
2258      ....................0000000001111111111222.
2259      ....................1234567890123456789012.  */
2260   const char *content = "     r += !aaa == bbb;\n";
2261   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2262   line_table_test ltt (case_);
2263   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2264
2265   const location_t c11 = linemap_position_for_column (line_table, 11);
2266   const location_t c12 = linemap_position_for_column (line_table, 12);
2267   const location_t c13 = linemap_position_for_column (line_table, 13);
2268   const location_t c14 = linemap_position_for_column (line_table, 14);
2269   const location_t c21 = linemap_position_for_column (line_table, 21);
2270
2271   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2272     return;
2273
2274   /* Use column 13 for the caret location, arbitrarily, to verify that we
2275      handle start != caret.  */
2276   const location_t aaa = make_location (c13, c12, c14);
2277   ASSERT_EQ (c13, get_pure_location (aaa));
2278   ASSERT_EQ (c12, get_start (aaa));
2279   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2280   ASSERT_EQ (c14, get_finish (aaa));
2281   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2282
2283   /* Make a location using a location with a range as the start-point.  */
2284   const location_t not_aaa = make_location (c11, aaa, c14);
2285   ASSERT_EQ (c11, get_pure_location (not_aaa));
2286   /* It should use the start location of the range, not store the range
2287      itself.  */
2288   ASSERT_EQ (c12, get_start (not_aaa));
2289   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2290   ASSERT_EQ (c14, get_finish (not_aaa));
2291   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2292
2293   /* Similarly, make a location with a range as the end-point.  */
2294   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2295   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2296   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2297   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2298   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2299   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2300   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2301   /* It should use the finish location of the range, not store the range
2302      itself.  */
2303   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2304   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2305   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2306   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2307   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2308 }
2309
2310 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
2311
2312 static void
2313 test_reading_source_line ()
2314 {
2315   /* Create a tempfile and write some text to it.  */
2316   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2317                         "01234567890123456789\n"
2318                         "This is the test text\n"
2319                         "This is the 3rd line");
2320
2321   /* Read back a specific line from the tempfile.  */
2322   char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2323   ASSERT_TRUE (source_line);
2324   ASSERT_TRUE (source_line.get_buffer () != NULL);
2325   ASSERT_EQ (20, source_line.length ());
2326   ASSERT_TRUE (!strncmp ("This is the 3rd line",
2327                          source_line.get_buffer (), source_line.length ()));
2328
2329   source_line = location_get_source_line (tmp.get_filename (), 2);
2330   ASSERT_TRUE (source_line);
2331   ASSERT_TRUE (source_line.get_buffer () != NULL);
2332   ASSERT_EQ (21, source_line.length ());
2333   ASSERT_TRUE (!strncmp ("This is the test text",
2334                          source_line.get_buffer (), source_line.length ()));
2335
2336   source_line = location_get_source_line (tmp.get_filename (), 4);
2337   ASSERT_FALSE (source_line);
2338   ASSERT_TRUE (source_line.get_buffer () == NULL);
2339 }
2340
2341 /* Tests of lexing.  */
2342
2343 /* Verify that token TOK from PARSER has cpp_token_as_text
2344    equal to EXPECTED_TEXT.  */
2345
2346 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
2347   SELFTEST_BEGIN_STMT                                                   \
2348     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
2349     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
2350   SELFTEST_END_STMT
2351
2352 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2353    and ranges from EXP_START_COL to EXP_FINISH_COL.
2354    Use LOC as the effective location of the selftest.  */
2355
2356 static void
2357 assert_token_loc_eq (const location &loc,
2358                      const cpp_token *tok,
2359                      const char *exp_filename, int exp_linenum,
2360                      int exp_start_col, int exp_finish_col)
2361 {
2362   location_t tok_loc = tok->src_loc;
2363   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2364   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2365
2366   /* If location_t values are sufficiently high, then column numbers
2367      will be unavailable.  */
2368   if (!should_have_column_data_p (tok_loc))
2369     return;
2370
2371   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2372   source_range tok_range = get_range_from_loc (line_table, tok_loc);
2373   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2374   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2375 }
2376
2377 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2378    SELFTEST_LOCATION as the effective location of the selftest.  */
2379
2380 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2381                             EXP_START_COL, EXP_FINISH_COL) \
2382   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2383                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2384
2385 /* Test of lexing a file using libcpp, verifying tokens and their
2386    location information.  */
2387
2388 static void
2389 test_lexer (const line_table_case &case_)
2390 {
2391   /* Create a tempfile and write some text to it.  */
2392   const char *content =
2393     /*00000000011111111112222222222333333.3333444444444.455555555556
2394       12345678901234567890123456789012345.6789012345678.901234567890.  */
2395     ("test_name /* c-style comment */\n"
2396      "                                  \"test literal\"\n"
2397      " // test c++-style comment\n"
2398      "   42\n");
2399   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2400
2401   line_table_test ltt (case_);
2402
2403   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2404
2405   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2406   ASSERT_NE (fname, NULL);
2407
2408   /* Verify that we get the expected tokens back, with the correct
2409      location information.  */
2410
2411   location_t loc;
2412   const cpp_token *tok;
2413   tok = cpp_get_token_with_location (parser, &loc);
2414   ASSERT_NE (tok, NULL);
2415   ASSERT_EQ (tok->type, CPP_NAME);
2416   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2417   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2418
2419   tok = cpp_get_token_with_location (parser, &loc);
2420   ASSERT_NE (tok, NULL);
2421   ASSERT_EQ (tok->type, CPP_STRING);
2422   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2423   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2424
2425   tok = cpp_get_token_with_location (parser, &loc);
2426   ASSERT_NE (tok, NULL);
2427   ASSERT_EQ (tok->type, CPP_NUMBER);
2428   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2429   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2430
2431   tok = cpp_get_token_with_location (parser, &loc);
2432   ASSERT_NE (tok, NULL);
2433   ASSERT_EQ (tok->type, CPP_EOF);
2434
2435   cpp_finish (parser, NULL);
2436   cpp_destroy (parser);
2437 }
2438
2439 /* Forward decls.  */
2440
2441 class lexer_test;
2442 class lexer_test_options;
2443
2444 /* A class for specifying options of a lexer_test.
2445    The "apply" vfunc is called during the lexer_test constructor.  */
2446
2447 class lexer_test_options
2448 {
2449  public:
2450   virtual void apply (lexer_test &) = 0;
2451 };
2452
2453 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2454    in its dtor.
2455
2456    This is needed by struct lexer_test to ensure that the cleanup of the
2457    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2458
2459 class cpp_reader_ptr
2460 {
2461  public:
2462   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2463
2464   ~cpp_reader_ptr ()
2465   {
2466     cpp_finish (m_ptr, NULL);
2467     cpp_destroy (m_ptr);
2468   }
2469
2470   operator cpp_reader * () const { return m_ptr; }
2471
2472  private:
2473   cpp_reader *m_ptr;
2474 };
2475
2476 /* A struct for writing lexer tests.  */
2477
2478 class lexer_test
2479 {
2480 public:
2481   lexer_test (const line_table_case &case_, const char *content,
2482               lexer_test_options *options);
2483   ~lexer_test ();
2484
2485   const cpp_token *get_token ();
2486
2487   /* The ordering of these fields matters.
2488      The line_table_test must be first, since the cpp_reader_ptr
2489      uses it.
2490      The cpp_reader must be cleaned up *after* the temp_source_file
2491      since the filenames in input.cc's input cache are owned by the
2492      cpp_reader; in particular, when ~temp_source_file evicts the
2493      filename the filenames must still be alive.  */
2494   line_table_test m_ltt;
2495   cpp_reader_ptr m_parser;
2496   temp_source_file m_tempfile;
2497   string_concat_db m_concats;
2498   bool m_implicitly_expect_EOF;
2499 };
2500
2501 /* Use an EBCDIC encoding for the execution charset, specifically
2502    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2503
2504    This exercises iconv integration within libcpp.
2505    Not every build of iconv supports the given charset,
2506    so we need to flag this error and handle it gracefully.  */
2507
2508 class ebcdic_execution_charset : public lexer_test_options
2509 {
2510  public:
2511   ebcdic_execution_charset () : m_num_iconv_errors (0)
2512     {
2513       gcc_assert (s_singleton == NULL);
2514       s_singleton = this;
2515     }
2516   ~ebcdic_execution_charset ()
2517     {
2518       gcc_assert (s_singleton == this);
2519       s_singleton = NULL;
2520     }
2521
2522   void apply (lexer_test &test) final override
2523   {
2524     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2525     cpp_opts->narrow_charset = "IBM1047";
2526
2527     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2528     callbacks->diagnostic = on_diagnostic;
2529   }
2530
2531   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2532                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2533                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2534                              rich_location *richloc ATTRIBUTE_UNUSED,
2535                              const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2536     ATTRIBUTE_FPTR_PRINTF(5,0)
2537   {
2538     gcc_assert (s_singleton);
2539     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2540     const char *msg = "conversion from %s to %s not supported by iconv";
2541 #ifdef ENABLE_NLS
2542     msg = dgettext ("cpplib", msg);
2543 #endif
2544     /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
2545        when the local iconv build doesn't support the conversion.  */
2546     if (strcmp (msgid, msg) == 0)
2547       {
2548         s_singleton->m_num_iconv_errors++;
2549         return true;
2550       }
2551
2552     /* Otherwise, we have an unexpected error.  */
2553     abort ();
2554   }
2555
2556   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2557
2558  private:
2559   static ebcdic_execution_charset *s_singleton;
2560   int m_num_iconv_errors;
2561 };
2562
2563 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2564
2565 /* A lexer_test_options subclass that records a list of diagnostic
2566    messages emitted by the lexer.  */
2567
2568 class lexer_diagnostic_sink : public lexer_test_options
2569 {
2570  public:
2571   lexer_diagnostic_sink ()
2572   {
2573     gcc_assert (s_singleton == NULL);
2574     s_singleton = this;
2575   }
2576   ~lexer_diagnostic_sink ()
2577   {
2578     gcc_assert (s_singleton == this);
2579     s_singleton = NULL;
2580
2581     int i;
2582     char *str;
2583     FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2584       free (str);
2585   }
2586
2587   void apply (lexer_test &test) final override
2588   {
2589     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2590     callbacks->diagnostic = on_diagnostic;
2591   }
2592
2593   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2594                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2595                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2596                              rich_location *richloc ATTRIBUTE_UNUSED,
2597                              const char *msgid, va_list *ap)
2598     ATTRIBUTE_FPTR_PRINTF(5,0)
2599   {
2600     char *msg = xvasprintf (msgid, *ap);
2601     s_singleton->m_diagnostics.safe_push (msg);
2602     return true;
2603   }
2604
2605   auto_vec<char *> m_diagnostics;
2606
2607  private:
2608   static lexer_diagnostic_sink *s_singleton;
2609 };
2610
2611 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2612
2613 /* Constructor.  Override line_table with a new instance based on CASE_,
2614    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2615    start parsing the tempfile.  */
2616
2617 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2618                         lexer_test_options *options)
2619 : m_ltt (case_),
2620   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2621   /* Create a tempfile and write the text to it.  */
2622   m_tempfile (SELFTEST_LOCATION, ".c", content),
2623   m_concats (),
2624   m_implicitly_expect_EOF (true)
2625 {
2626   if (options)
2627     options->apply (*this);
2628
2629   cpp_init_iconv (m_parser);
2630
2631   /* Parse the file.  */
2632   const char *fname = cpp_read_main_file (m_parser,
2633                                           m_tempfile.get_filename ());
2634   ASSERT_NE (fname, NULL);
2635 }
2636
2637 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2638
2639 lexer_test::~lexer_test ()
2640 {
2641   location_t loc;
2642   const cpp_token *tok;
2643
2644   if (m_implicitly_expect_EOF)
2645     {
2646       tok = cpp_get_token_with_location (m_parser, &loc);
2647       ASSERT_NE (tok, NULL);
2648       ASSERT_EQ (tok->type, CPP_EOF);
2649     }
2650 }
2651
2652 /* Get the next token from m_parser.  */
2653
2654 const cpp_token *
2655 lexer_test::get_token ()
2656 {
2657   location_t loc;
2658   const cpp_token *tok;
2659
2660   tok = cpp_get_token_with_location (m_parser, &loc);
2661   ASSERT_NE (tok, NULL);
2662   return tok;
2663 }
2664
2665 /* Verify that locations within string literals are correctly handled.  */
2666
2667 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2668    using the string concatenation database for TEST.
2669
2670    Assert that the character at index IDX is on EXPECTED_LINE,
2671    and that it begins at column EXPECTED_START_COL and ends at
2672    EXPECTED_FINISH_COL (unless the locations are beyond
2673    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2674    columns).  */
2675
2676 static void
2677 assert_char_at_range (const location &loc,
2678                       lexer_test& test,
2679                       location_t strloc, enum cpp_ttype type, int idx,
2680                       int expected_line, int expected_start_col,
2681                       int expected_finish_col)
2682 {
2683   cpp_reader *pfile = test.m_parser;
2684   string_concat_db *concats = &test.m_concats;
2685
2686   source_range actual_range = source_range();
2687   const char *err
2688     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2689                                  &actual_range);
2690   if (should_have_column_data_p (strloc))
2691     ASSERT_EQ_AT (loc, NULL, err);
2692   else
2693     {
2694       ASSERT_STREQ_AT (loc,
2695                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2696                        err);
2697       return;
2698     }
2699
2700   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2701   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2702   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2703   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2704
2705   if (should_have_column_data_p (actual_range.m_start))
2706     {
2707       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2708       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2709     }
2710   if (should_have_column_data_p (actual_range.m_finish))
2711     {
2712       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2713       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2714     }
2715 }
2716
2717 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2718    the effective location of any errors.  */
2719
2720 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2721                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2722   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2723                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2724                         (EXPECTED_FINISH_COL))
2725
2726 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2727    using the string concatenation database for TEST.
2728
2729    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2730
2731 static void
2732 assert_num_substring_ranges (const location &loc,
2733                              lexer_test& test,
2734                              location_t strloc,
2735                              enum cpp_ttype type,
2736                              int expected_num_ranges)
2737 {
2738   cpp_reader *pfile = test.m_parser;
2739   string_concat_db *concats = &test.m_concats;
2740
2741   int actual_num_ranges = -1;
2742   const char *err
2743     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2744                                            &actual_num_ranges);
2745   if (should_have_column_data_p (strloc))
2746     ASSERT_EQ_AT (loc, NULL, err);
2747   else
2748     {
2749       ASSERT_STREQ_AT (loc,
2750                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2751                        err);
2752       return;
2753     }
2754   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2755 }
2756
2757 /* Macro for calling assert_num_substring_ranges, supplying
2758    SELFTEST_LOCATION for the effective location of any errors.  */
2759
2760 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2761                                     EXPECTED_NUM_RANGES)                \
2762   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2763                                (TYPE), (EXPECTED_NUM_RANGES))
2764
2765
2766 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2767    returns an error (using the string concatenation database for TEST).  */
2768
2769 static void
2770 assert_has_no_substring_ranges (const location &loc,
2771                                 lexer_test& test,
2772                                 location_t strloc,
2773                                 enum cpp_ttype type,
2774                                 const char *expected_err)
2775 {
2776   cpp_reader *pfile = test.m_parser;
2777   string_concat_db *concats = &test.m_concats;
2778   cpp_substring_ranges ranges;
2779   const char *actual_err
2780     = get_substring_ranges_for_loc (pfile, concats, strloc,
2781                                     type, ranges);
2782   if (should_have_column_data_p (strloc))
2783     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2784   else
2785     ASSERT_STREQ_AT (loc,
2786                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2787                      actual_err);
2788 }
2789
2790 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2791     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2792                                     (STRLOC), (TYPE), (ERR))
2793
2794 /* Lex a simple string literal.  Verify the substring location data, before
2795    and after running cpp_interpret_string on it.  */
2796
2797 static void
2798 test_lexer_string_locations_simple (const line_table_case &case_)
2799 {
2800   /* Digits 0-9 (with 0 at column 10), the simple way.
2801      ....................000000000.11111111112.2222222223333333333
2802      ....................123456789.01234567890.1234567890123456789
2803      We add a trailing comment to ensure that we correctly locate
2804      the end of the string literal token.  */
2805   const char *content = "        \"0123456789\" /* not a string */\n";
2806   lexer_test test (case_, content, NULL);
2807
2808   /* Verify that we get the expected token back, with the correct
2809      location information.  */
2810   const cpp_token *tok = test.get_token ();
2811   ASSERT_EQ (tok->type, CPP_STRING);
2812   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2813   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2814
2815   /* At this point in lexing, the quote characters are treated as part of
2816      the string (they are stripped off by cpp_interpret_string).  */
2817
2818   ASSERT_EQ (tok->val.str.len, 12);
2819
2820   /* Verify that cpp_interpret_string works.  */
2821   cpp_string dst_string;
2822   const enum cpp_ttype type = CPP_STRING;
2823   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2824                                       &dst_string, type);
2825   ASSERT_TRUE (result);
2826   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2827   free (const_cast <unsigned char *> (dst_string.text));
2828
2829   /* Verify ranges of individual characters.  This no longer includes the
2830      opening quote, but does include the closing quote.  */
2831   for (int i = 0; i <= 10; i++)
2832     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2833                           10 + i, 10 + i);
2834
2835   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2836 }
2837
2838 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2839    encoding.  */
2840
2841 static void
2842 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2843 {
2844   /* EBCDIC support requires iconv.  */
2845   if (!HAVE_ICONV)
2846     return;
2847
2848   /* Digits 0-9 (with 0 at column 10), the simple way.
2849      ....................000000000.11111111112.2222222223333333333
2850      ....................123456789.01234567890.1234567890123456789
2851      We add a trailing comment to ensure that we correctly locate
2852      the end of the string literal token.  */
2853   const char *content = "        \"0123456789\" /* not a string */\n";
2854   ebcdic_execution_charset use_ebcdic;
2855   lexer_test test (case_, content, &use_ebcdic);
2856
2857   /* Verify that we get the expected token back, with the correct
2858      location information.  */
2859   const cpp_token *tok = test.get_token ();
2860   ASSERT_EQ (tok->type, CPP_STRING);
2861   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2862   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2863
2864   /* At this point in lexing, the quote characters are treated as part of
2865      the string (they are stripped off by cpp_interpret_string).  */
2866
2867   ASSERT_EQ (tok->val.str.len, 12);
2868
2869   /* The remainder of the test requires an iconv implementation that
2870      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2871   if (use_ebcdic.iconv_errors_occurred_p ())
2872     return;
2873
2874   /* Verify that cpp_interpret_string works.  */
2875   cpp_string dst_string;
2876   const enum cpp_ttype type = CPP_STRING;
2877   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2878                                       &dst_string, type);
2879   ASSERT_TRUE (result);
2880   /* We should now have EBCDIC-encoded text, specifically
2881      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2882      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2883   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2884                 (const char *)dst_string.text);
2885   free (const_cast <unsigned char *> (dst_string.text));
2886
2887   /* Verify that we don't attempt to record substring location information
2888      for such cases.  */
2889   ASSERT_HAS_NO_SUBSTRING_RANGES
2890     (test, tok->src_loc, type,
2891      "execution character set != source character set");
2892 }
2893
2894 /* Lex a string literal containing a hex-escaped character.
2895    Verify the substring location data, before and after running
2896    cpp_interpret_string on it.  */
2897
2898 static void
2899 test_lexer_string_locations_hex (const line_table_case &case_)
2900 {
2901   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2902      and with a space in place of digit 6, to terminate the escaped
2903      hex code.
2904      ....................000000000.111111.11112222.
2905      ....................123456789.012345.67890123.  */
2906   const char *content = "        \"01234\\x35 789\"\n";
2907   lexer_test test (case_, content, NULL);
2908
2909   /* Verify that we get the expected token back, with the correct
2910      location information.  */
2911   const cpp_token *tok = test.get_token ();
2912   ASSERT_EQ (tok->type, CPP_STRING);
2913   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2914   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2915
2916   /* At this point in lexing, the quote characters are treated as part of
2917      the string (they are stripped off by cpp_interpret_string).  */
2918   ASSERT_EQ (tok->val.str.len, 15);
2919
2920   /* Verify that cpp_interpret_string works.  */
2921   cpp_string dst_string;
2922   const enum cpp_ttype type = CPP_STRING;
2923   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2924                                       &dst_string, type);
2925   ASSERT_TRUE (result);
2926   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2927   free (const_cast <unsigned char *> (dst_string.text));
2928
2929   /* Verify ranges of individual characters.  This no longer includes the
2930      opening quote, but does include the closing quote.  */
2931   for (int i = 0; i <= 4; i++)
2932     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2933   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2934   for (int i = 6; i <= 10; i++)
2935     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2936
2937   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2938 }
2939
2940 /* Lex a string literal containing an octal-escaped character.
2941    Verify the substring location data after running cpp_interpret_string
2942    on it.  */
2943
2944 static void
2945 test_lexer_string_locations_oct (const line_table_case &case_)
2946 {
2947   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2948      and with a space in place of digit 6, to terminate the escaped
2949      octal code.
2950      ....................000000000.111111.11112222.2222223333333333444
2951      ....................123456789.012345.67890123.4567890123456789012  */
2952   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2953   lexer_test test (case_, content, NULL);
2954
2955   /* Verify that we get the expected token back, with the correct
2956      location information.  */
2957   const cpp_token *tok = test.get_token ();
2958   ASSERT_EQ (tok->type, CPP_STRING);
2959   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2960
2961   /* Verify that cpp_interpret_string works.  */
2962   cpp_string dst_string;
2963   const enum cpp_ttype type = CPP_STRING;
2964   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2965                                       &dst_string, type);
2966   ASSERT_TRUE (result);
2967   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2968   free (const_cast <unsigned char *> (dst_string.text));
2969
2970   /* Verify ranges of individual characters.  This no longer includes the
2971      opening quote, but does include the closing quote.  */
2972   for (int i = 0; i < 5; i++)
2973     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2974   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2975   for (int i = 6; i <= 10; i++)
2976     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2977
2978   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2979 }
2980
2981 /* Test of string literal containing letter escapes.  */
2982
2983 static void
2984 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2985 {
2986   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2987      .....................000000000.1.11111.1.1.11222.22222223333333
2988      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2989   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2990   lexer_test test (case_, content, NULL);
2991
2992   /* Verify that we get the expected tokens back.  */
2993   const cpp_token *tok = test.get_token ();
2994   ASSERT_EQ (tok->type, CPP_STRING);
2995   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2996
2997   /* Verify ranges of individual characters. */
2998   /* "\t".  */
2999   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3000                         0, 1, 10, 11);
3001   /* "foo". */
3002   for (int i = 1; i <= 3; i++)
3003     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3004                           i, 1, 11 + i, 11 + i);
3005   /* "\\" and "\n".  */
3006   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3007                         4, 1, 15, 16);
3008   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3009                         5, 1, 17, 18);
3010
3011   /* "bar" and closing quote for nul-terminator.  */
3012   for (int i = 6; i <= 9; i++)
3013     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3014                           i, 1, 13 + i, 13 + i);
3015
3016   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
3017 }
3018
3019 /* Another test of a string literal containing a letter escape.
3020    Based on string seen in
3021      printf ("%-%\n");
3022    in gcc.dg/format/c90-printf-1.c.  */
3023
3024 static void
3025 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
3026 {
3027   /* .....................000000000.1111.11.1111.22222222223.
3028      .....................123456789.0123.45.6789.01234567890.  */
3029   const char *content = ("        \"%-%\\n\" /* non-str */\n");
3030   lexer_test test (case_, content, NULL);
3031
3032   /* Verify that we get the expected tokens back.  */
3033   const cpp_token *tok = test.get_token ();
3034   ASSERT_EQ (tok->type, CPP_STRING);
3035   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
3036
3037   /* Verify ranges of individual characters. */
3038   /* "%-%".  */
3039   for (int i = 0; i < 3; i++)
3040     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3041                           i, 1, 10 + i, 10 + i);
3042   /* "\n".  */
3043   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3044                         3, 1, 13, 14);
3045
3046   /* Closing quote for nul-terminator.  */
3047   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3048                         4, 1, 15, 15);
3049
3050   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
3051 }
3052
3053 /* Lex a string literal containing UCN 4 characters.
3054    Verify the substring location data after running cpp_interpret_string
3055    on it.  */
3056
3057 static void
3058 test_lexer_string_locations_ucn4 (const line_table_case &case_)
3059 {
3060   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
3061      as UCN 4.
3062      ....................000000000.111111.111122.222222223.33333333344444
3063      ....................123456789.012345.678901.234567890.12345678901234  */
3064   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
3065   lexer_test test (case_, content, NULL);
3066
3067   /* Verify that we get the expected token back, with the correct
3068      location information.  */
3069   const cpp_token *tok = test.get_token ();
3070   ASSERT_EQ (tok->type, CPP_STRING);
3071   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
3072
3073   /* Verify that cpp_interpret_string works.
3074      The string should be encoded in the execution character
3075      set.  Assuming that is UTF-8, we should have the following:
3076      -----------  ----  -----  -------  ----------------
3077      Byte offset  Byte  Octal  Unicode  Source Column(s)
3078      -----------  ----  -----  -------  ----------------
3079      0            0x30         '0'      10
3080      1            0x31         '1'      11
3081      2            0x32         '2'      12
3082      3            0x33         '3'      13
3083      4            0x34         '4'      14
3084      5            0xE2  \342   U+2174   15-20
3085      6            0x85  \205    (cont)  15-20
3086      7            0xB4  \264    (cont)  15-20
3087      8            0xE2  \342   U+2175   21-26
3088      9            0x85  \205    (cont)  21-26
3089      10           0xB5  \265    (cont)  21-26
3090      11           0x37         '7'      27
3091      12           0x38         '8'      28
3092      13           0x39         '9'      29
3093      14           0x00                  30 (closing quote)
3094      -----------  ----  -----  -------  ---------------.  */
3095
3096   cpp_string dst_string;
3097   const enum cpp_ttype type = CPP_STRING;
3098   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3099                                       &dst_string, type);
3100   ASSERT_TRUE (result);
3101   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3102                 (const char *)dst_string.text);
3103   free (const_cast <unsigned char *> (dst_string.text));
3104
3105   /* Verify ranges of individual characters.  This no longer includes the
3106      opening quote, but does include the closing quote.
3107      '01234'.  */
3108   for (int i = 0; i <= 4; i++)
3109     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3110   /* U+2174.  */
3111   for (int i = 5; i <= 7; i++)
3112     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
3113   /* U+2175.  */
3114   for (int i = 8; i <= 10; i++)
3115     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
3116   /* '789' and nul terminator  */
3117   for (int i = 11; i <= 14; i++)
3118     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
3119
3120   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3121 }
3122
3123 /* Lex a string literal containing UCN 8 characters.
3124    Verify the substring location data after running cpp_interpret_string
3125    on it.  */
3126
3127 static void
3128 test_lexer_string_locations_ucn8 (const line_table_case &case_)
3129 {
3130   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
3131      ....................000000000.111111.1111222222.2222333333333.344444
3132      ....................123456789.012345.6789012345.6789012345678.901234  */
3133   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
3134   lexer_test test (case_, content, NULL);
3135
3136   /* Verify that we get the expected token back, with the correct
3137      location information.  */
3138   const cpp_token *tok = test.get_token ();
3139   ASSERT_EQ (tok->type, CPP_STRING);
3140   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
3141                            "\"01234\\U00002174\\U00002175789\"");
3142
3143   /* Verify that cpp_interpret_string works.
3144      The UTF-8 encoding of the string is identical to that from
3145      the ucn4 testcase above; the only difference is the column
3146      locations.  */
3147   cpp_string dst_string;
3148   const enum cpp_ttype type = CPP_STRING;
3149   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3150                                       &dst_string, type);
3151   ASSERT_TRUE (result);
3152   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3153                 (const char *)dst_string.text);
3154   free (const_cast <unsigned char *> (dst_string.text));
3155
3156   /* Verify ranges of individual characters.  This no longer includes the
3157      opening quote, but does include the closing quote.
3158      '01234'.  */
3159   for (int i = 0; i <= 4; i++)
3160     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3161   /* U+2174.  */
3162   for (int i = 5; i <= 7; i++)
3163     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
3164   /* U+2175.  */
3165   for (int i = 8; i <= 10; i++)
3166     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
3167   /* '789' at columns 35-37  */
3168   for (int i = 11; i <= 13; i++)
3169     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
3170   /* Closing quote/nul-terminator at column 38.  */
3171   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
3172
3173   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3174 }
3175
3176 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
3177
3178 static uint32_t
3179 uint32_from_big_endian (const uint32_t *ptr_be_value)
3180 {
3181   const unsigned char *buf = (const unsigned char *)ptr_be_value;
3182   return (((uint32_t) buf[0] << 24)
3183           | ((uint32_t) buf[1] << 16)
3184           | ((uint32_t) buf[2] << 8)
3185           | (uint32_t) buf[3]);
3186 }
3187
3188 /* Lex a wide string literal and verify that attempts to read substring
3189    location data from it fail gracefully.  */
3190
3191 static void
3192 test_lexer_string_locations_wide_string (const line_table_case &case_)
3193 {
3194   /* Digits 0-9.
3195      ....................000000000.11111111112.22222222233333
3196      ....................123456789.01234567890.12345678901234  */
3197   const char *content = "       L\"0123456789\" /* non-str */\n";
3198   lexer_test test (case_, content, NULL);
3199
3200   /* Verify that we get the expected token back, with the correct
3201      location information.  */
3202   const cpp_token *tok = test.get_token ();
3203   ASSERT_EQ (tok->type, CPP_WSTRING);
3204   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
3205
3206   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
3207   cpp_string dst_string;
3208   const enum cpp_ttype type = CPP_WSTRING;
3209   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3210                                       &dst_string, type);
3211   ASSERT_TRUE (result);
3212   /* The cpp_reader defaults to big-endian with
3213      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3214      now be encoded as UTF-32BE.  */
3215   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3216   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3217   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3218   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3219   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3220   free (const_cast <unsigned char *> (dst_string.text));
3221
3222   /* We don't yet support generating substring location information
3223      for L"" strings.  */
3224   ASSERT_HAS_NO_SUBSTRING_RANGES
3225     (test, tok->src_loc, type,
3226      "execution character set != source character set");
3227 }
3228
3229 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
3230
3231 static uint16_t
3232 uint16_from_big_endian (const uint16_t *ptr_be_value)
3233 {
3234   const unsigned char *buf = (const unsigned char *)ptr_be_value;
3235   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3236 }
3237
3238 /* Lex a u"" string literal and verify that attempts to read substring
3239    location data from it fail gracefully.  */
3240
3241 static void
3242 test_lexer_string_locations_string16 (const line_table_case &case_)
3243 {
3244   /* Digits 0-9.
3245      ....................000000000.11111111112.22222222233333
3246      ....................123456789.01234567890.12345678901234  */
3247   const char *content = "       u\"0123456789\" /* non-str */\n";
3248   lexer_test test (case_, content, NULL);
3249
3250   /* Verify that we get the expected token back, with the correct
3251      location information.  */
3252   const cpp_token *tok = test.get_token ();
3253   ASSERT_EQ (tok->type, CPP_STRING16);
3254   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3255
3256   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
3257   cpp_string dst_string;
3258   const enum cpp_ttype type = CPP_STRING16;
3259   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3260                                       &dst_string, type);
3261   ASSERT_TRUE (result);
3262
3263   /* The cpp_reader defaults to big-endian, so dst_string should
3264      now be encoded as UTF-16BE.  */
3265   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3266   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3267   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3268   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3269   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3270   free (const_cast <unsigned char *> (dst_string.text));
3271
3272   /* We don't yet support generating substring location information
3273      for L"" strings.  */
3274   ASSERT_HAS_NO_SUBSTRING_RANGES
3275     (test, tok->src_loc, type,
3276      "execution character set != source character set");
3277 }
3278
3279 /* Lex a U"" string literal and verify that attempts to read substring
3280    location data from it fail gracefully.  */
3281
3282 static void
3283 test_lexer_string_locations_string32 (const line_table_case &case_)
3284 {
3285   /* Digits 0-9.
3286      ....................000000000.11111111112.22222222233333
3287      ....................123456789.01234567890.12345678901234  */
3288   const char *content = "       U\"0123456789\" /* non-str */\n";
3289   lexer_test test (case_, content, NULL);
3290
3291   /* Verify that we get the expected token back, with the correct
3292      location information.  */
3293   const cpp_token *tok = test.get_token ();
3294   ASSERT_EQ (tok->type, CPP_STRING32);
3295   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3296
3297   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
3298   cpp_string dst_string;
3299   const enum cpp_ttype type = CPP_STRING32;
3300   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3301                                       &dst_string, type);
3302   ASSERT_TRUE (result);
3303
3304   /* The cpp_reader defaults to big-endian, so dst_string should
3305      now be encoded as UTF-32BE.  */
3306   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3307   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3308   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3309   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3310   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3311   free (const_cast <unsigned char *> (dst_string.text));
3312
3313   /* We don't yet support generating substring location information
3314      for L"" strings.  */
3315   ASSERT_HAS_NO_SUBSTRING_RANGES
3316     (test, tok->src_loc, type,
3317      "execution character set != source character set");
3318 }
3319
3320 /* Lex a u8-string literal.
3321    Verify the substring location data after running cpp_interpret_string
3322    on it.  */
3323
3324 static void
3325 test_lexer_string_locations_u8 (const line_table_case &case_)
3326 {
3327   /* Digits 0-9.
3328      ....................000000000.11111111112.22222222233333
3329      ....................123456789.01234567890.12345678901234  */
3330   const char *content = "      u8\"0123456789\" /* non-str */\n";
3331   lexer_test test (case_, content, NULL);
3332
3333   /* Verify that we get the expected token back, with the correct
3334      location information.  */
3335   const cpp_token *tok = test.get_token ();
3336   ASSERT_EQ (tok->type, CPP_UTF8STRING);
3337   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3338
3339   /* Verify that cpp_interpret_string works.  */
3340   cpp_string dst_string;
3341   const enum cpp_ttype type = CPP_STRING;
3342   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3343                                       &dst_string, type);
3344   ASSERT_TRUE (result);
3345   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3346   free (const_cast <unsigned char *> (dst_string.text));
3347
3348   /* Verify ranges of individual characters.  This no longer includes the
3349      opening quote, but does include the closing quote.  */
3350   for (int i = 0; i <= 10; i++)
3351     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3352 }
3353
3354 /* Lex a string literal containing UTF-8 source characters.
3355    Verify the substring location data after running cpp_interpret_string
3356    on it.  */
3357
3358 static void
3359 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3360 {
3361  /* This string literal is written out to the source file as UTF-8,
3362     and is of the form "before mojibake after", where "mojibake"
3363     is written as the following four unicode code points:
3364        U+6587 CJK UNIFIED IDEOGRAPH-6587
3365        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3366        U+5316 CJK UNIFIED IDEOGRAPH-5316
3367        U+3051 HIRAGANA LETTER KE.
3368      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3369      "before" and "after" are 1 byte per unicode character.
3370
3371      The numbering shown are "columns", which are *byte* numbers within
3372      the line, rather than unicode character numbers.
3373
3374      .................... 000000000.1111111.
3375      .................... 123456789.0123456.  */
3376   const char *content = ("        \"before "
3377                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3378                               UTF-8: 0xE6 0x96 0x87
3379                               C octal escaped UTF-8: \346\226\207
3380                             "column" numbers: 17-19.  */
3381                          "\346\226\207"
3382
3383                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3384                               UTF-8: 0xE5 0xAD 0x97
3385                               C octal escaped UTF-8: \345\255\227
3386                             "column" numbers: 20-22.  */
3387                          "\345\255\227"
3388
3389                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3390                               UTF-8: 0xE5 0x8C 0x96
3391                               C octal escaped UTF-8: \345\214\226
3392                             "column" numbers: 23-25.  */
3393                          "\345\214\226"
3394
3395                          /* U+3051 HIRAGANA LETTER KE
3396                               UTF-8: 0xE3 0x81 0x91
3397                               C octal escaped UTF-8: \343\201\221
3398                             "column" numbers: 26-28.  */
3399                          "\343\201\221"
3400
3401                          /* column numbers 29 onwards
3402                           2333333.33334444444444
3403                           9012345.67890123456789. */
3404                          " after\" /* non-str */\n");
3405   lexer_test test (case_, content, NULL);
3406
3407   /* Verify that we get the expected token back, with the correct
3408      location information.  */
3409   const cpp_token *tok = test.get_token ();
3410   ASSERT_EQ (tok->type, CPP_STRING);
3411   ASSERT_TOKEN_AS_TEXT_EQ
3412     (test.m_parser, tok,
3413      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3414
3415   /* Verify that cpp_interpret_string works.  */
3416   cpp_string dst_string;
3417   const enum cpp_ttype type = CPP_STRING;
3418   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3419                                       &dst_string, type);
3420   ASSERT_TRUE (result);
3421   ASSERT_STREQ
3422     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3423      (const char *)dst_string.text);
3424   free (const_cast <unsigned char *> (dst_string.text));
3425
3426   /* Verify ranges of individual characters.  This no longer includes the
3427      opening quote, but does include the closing quote.
3428      Assuming that both source and execution encodings are UTF-8, we have
3429      a run of 25 octets in each, plus the NUL terminator.  */
3430   for (int i = 0; i < 25; i++)
3431     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3432   /* NUL-terminator should use the closing quote at column 35.  */
3433   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3434
3435   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3436 }
3437
3438 /* Test of string literal concatenation.  */
3439
3440 static void
3441 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3442 {
3443   /* Digits 0-9.
3444      .....................000000000.111111.11112222222222
3445      .....................123456789.012345.67890123456789.  */
3446   const char *content = ("        \"01234\" /* non-str */\n"
3447                          "        \"56789\" /* non-str */\n");
3448   lexer_test test (case_, content, NULL);
3449
3450   location_t input_locs[2];
3451
3452   /* Verify that we get the expected tokens back.  */
3453   auto_vec <cpp_string> input_strings;
3454   const cpp_token *tok_a = test.get_token ();
3455   ASSERT_EQ (tok_a->type, CPP_STRING);
3456   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3457   input_strings.safe_push (tok_a->val.str);
3458   input_locs[0] = tok_a->src_loc;
3459
3460   const cpp_token *tok_b = test.get_token ();
3461   ASSERT_EQ (tok_b->type, CPP_STRING);
3462   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3463   input_strings.safe_push (tok_b->val.str);
3464   input_locs[1] = tok_b->src_loc;
3465
3466   /* Verify that cpp_interpret_string works.  */
3467   cpp_string dst_string;
3468   const enum cpp_ttype type = CPP_STRING;
3469   bool result = cpp_interpret_string (test.m_parser,
3470                                       input_strings.address (), 2,
3471                                       &dst_string, type);
3472   ASSERT_TRUE (result);
3473   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3474   free (const_cast <unsigned char *> (dst_string.text));
3475
3476   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3477   test.m_concats.record_string_concatenation (2, input_locs);
3478
3479   location_t initial_loc = input_locs[0];
3480
3481   /* "01234" on line 1.  */
3482   for (int i = 0; i <= 4; i++)
3483     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3484   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3485   for (int i = 5; i <= 10; i++)
3486     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3487
3488   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3489 }
3490
3491 /* Another test of string literal concatenation.  */
3492
3493 static void
3494 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3495 {
3496   /* Digits 0-9.
3497      .....................000000000.111.11111112222222
3498      .....................123456789.012.34567890123456.  */
3499   const char *content = ("        \"01\" /* non-str */\n"
3500                          "        \"23\" /* non-str */\n"
3501                          "        \"45\" /* non-str */\n"
3502                          "        \"67\" /* non-str */\n"
3503                          "        \"89\" /* non-str */\n");
3504   lexer_test test (case_, content, NULL);
3505
3506   auto_vec <cpp_string> input_strings;
3507   location_t input_locs[5];
3508
3509   /* Verify that we get the expected tokens back.  */
3510   for (int i = 0; i < 5; i++)
3511     {
3512       const cpp_token *tok = test.get_token ();
3513       ASSERT_EQ (tok->type, CPP_STRING);
3514       input_strings.safe_push (tok->val.str);
3515       input_locs[i] = tok->src_loc;
3516     }
3517
3518   /* Verify that cpp_interpret_string works.  */
3519   cpp_string dst_string;
3520   const enum cpp_ttype type = CPP_STRING;
3521   bool result = cpp_interpret_string (test.m_parser,
3522                                       input_strings.address (), 5,
3523                                       &dst_string, type);
3524   ASSERT_TRUE (result);
3525   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3526   free (const_cast <unsigned char *> (dst_string.text));
3527
3528   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3529   test.m_concats.record_string_concatenation (5, input_locs);
3530
3531   location_t initial_loc = input_locs[0];
3532
3533   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3534      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3535      and expect get_source_range_for_substring to fail.
3536      However, for a string concatenation test, we can have a case
3537      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3538      but subsequent strings can be after it.
3539      Attempting to detect this within assert_char_at_range
3540      would overcomplicate the logic for the common test cases, so
3541      we detect it here.  */
3542   if (should_have_column_data_p (input_locs[0])
3543       && !should_have_column_data_p (input_locs[4]))
3544     {
3545       /* Verify that get_source_range_for_substring gracefully rejects
3546          this case.  */
3547       source_range actual_range;
3548       const char *err
3549         = get_source_range_for_char (test.m_parser, &test.m_concats,
3550                                      initial_loc, type, 0, &actual_range);
3551       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3552       return;
3553     }
3554
3555   for (int i = 0; i < 5; i++)
3556     for (int j = 0; j < 2; j++)
3557       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3558                             i + 1, 10 + j, 10 + j);
3559
3560   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3561   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3562
3563   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3564 }
3565
3566 /* Another test of string literal concatenation, this time combined with
3567    various kinds of escaped characters.  */
3568
3569 static void
3570 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3571 {
3572   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3573      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3574   const char *content
3575     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3576        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3577     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3578   lexer_test test (case_, content, NULL);
3579
3580   auto_vec <cpp_string> input_strings;
3581   location_t input_locs[4];
3582
3583   /* Verify that we get the expected tokens back.  */
3584   for (int i = 0; i < 4; i++)
3585     {
3586       const cpp_token *tok = test.get_token ();
3587       ASSERT_EQ (tok->type, CPP_STRING);
3588       input_strings.safe_push (tok->val.str);
3589       input_locs[i] = tok->src_loc;
3590     }
3591
3592   /* Verify that cpp_interpret_string works.  */
3593   cpp_string dst_string;
3594   const enum cpp_ttype type = CPP_STRING;
3595   bool result = cpp_interpret_string (test.m_parser,
3596                                       input_strings.address (), 4,
3597                                       &dst_string, type);
3598   ASSERT_TRUE (result);
3599   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3600   free (const_cast <unsigned char *> (dst_string.text));
3601
3602   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3603   test.m_concats.record_string_concatenation (4, input_locs);
3604
3605   location_t initial_loc = input_locs[0];
3606
3607   for (int i = 0; i <= 4; i++)
3608     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3609   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3610   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3611   for (int i = 7; i <= 9; i++)
3612     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3613
3614   /* NUL-terminator should use the location of the final closing quote.  */
3615   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3616
3617   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3618 }
3619
3620 /* Test of string literal in a macro.  */
3621
3622 static void
3623 test_lexer_string_locations_macro (const line_table_case &case_)
3624 {
3625   /* Digits 0-9.
3626      .....................0000000001111111111.22222222223.
3627      .....................1234567890123456789.01234567890.  */
3628   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3629                          "  MACRO");
3630   lexer_test test (case_, content, NULL);
3631
3632   /* Verify that we get the expected tokens back.  */
3633   const cpp_token *tok = test.get_token ();
3634   ASSERT_EQ (tok->type, CPP_PADDING);
3635
3636   tok = test.get_token ();
3637   ASSERT_EQ (tok->type, CPP_STRING);
3638   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3639
3640   /* Verify ranges of individual characters.  We ought to
3641      see columns within the macro definition.  */
3642   for (int i = 0; i <= 10; i++)
3643     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3644                           i, 1, 20 + i, 20 + i);
3645
3646   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3647
3648   tok = test.get_token ();
3649   ASSERT_EQ (tok->type, CPP_PADDING);
3650 }
3651
3652 /* Test of stringification of a macro argument.  */
3653
3654 static void
3655 test_lexer_string_locations_stringified_macro_argument
3656   (const line_table_case &case_)
3657 {
3658   /* .....................000000000111111111122222222223.
3659      .....................123456789012345678901234567890.  */
3660   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3661                          "MACRO(foo)\n");
3662   lexer_test test (case_, content, NULL);
3663
3664   /* Verify that we get the expected token back.  */
3665   const cpp_token *tok = test.get_token ();
3666   ASSERT_EQ (tok->type, CPP_PADDING);
3667
3668   tok = test.get_token ();
3669   ASSERT_EQ (tok->type, CPP_STRING);
3670   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3671
3672   /* We don't support getting the location of a stringified macro
3673      argument.  Verify that it fails gracefully.  */
3674   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3675                                   "cpp_interpret_string_1 failed");
3676
3677   tok = test.get_token ();
3678   ASSERT_EQ (tok->type, CPP_PADDING);
3679
3680   tok = test.get_token ();
3681   ASSERT_EQ (tok->type, CPP_PADDING);
3682 }
3683
3684 /* Ensure that we are fail gracefully if something attempts to pass
3685    in a location that isn't a string literal token.  Seen on this code:
3686
3687      const char a[] = " %d ";
3688      __builtin_printf (a, 0.5);
3689                        ^
3690
3691    when c-format.cc erroneously used the indicated one-character
3692    location as the format string location, leading to a read past the
3693    end of a string buffer in cpp_interpret_string_1.  */
3694
3695 static void
3696 test_lexer_string_locations_non_string (const line_table_case &case_)
3697 {
3698   /* .....................000000000111111111122222222223.
3699      .....................123456789012345678901234567890.  */
3700   const char *content = ("         a\n");
3701   lexer_test test (case_, content, NULL);
3702
3703   /* Verify that we get the expected token back.  */
3704   const cpp_token *tok = test.get_token ();
3705   ASSERT_EQ (tok->type, CPP_NAME);
3706   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3707
3708   /* At this point, libcpp is attempting to interpret the name as a
3709      string literal, despite it not starting with a quote.  We don't detect
3710      that, but we should at least fail gracefully.  */
3711   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3712                                   "cpp_interpret_string_1 failed");
3713 }
3714
3715 /* Ensure that we can read substring information for a token which
3716    starts in one linemap and ends in another .  Adapted from
3717    gcc.dg/cpp/pr69985.c.  */
3718
3719 static void
3720 test_lexer_string_locations_long_line (const line_table_case &case_)
3721 {
3722   /* .....................000000.000111111111
3723      .....................123456.789012346789.  */
3724   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3725                          "     \"0123456789012345678901234567890123456789"
3726                          "0123456789012345678901234567890123456789"
3727                          "0123456789012345678901234567890123456789"
3728                          "0123456789\"\n");
3729
3730   lexer_test test (case_, content, NULL);
3731
3732   /* Verify that we get the expected token back.  */
3733   const cpp_token *tok = test.get_token ();
3734   ASSERT_EQ (tok->type, CPP_STRING);
3735
3736   if (!should_have_column_data_p (line_table->highest_location))
3737     return;
3738
3739   /* Verify ranges of individual characters.  */
3740   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3741   for (int i = 0; i < 131; i++)
3742     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3743                           i, 2, 7 + i, 7 + i);
3744 }
3745
3746 /* Test of locations within a raw string that doesn't contain a newline.  */
3747
3748 static void
3749 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3750 {
3751   /* .....................00.0000000111111111122.
3752      .....................12.3456789012345678901.  */
3753   const char *content = ("R\"foo(0123456789)foo\"\n");
3754   lexer_test test (case_, content, NULL);
3755
3756   /* Verify that we get the expected token back.  */
3757   const cpp_token *tok = test.get_token ();
3758   ASSERT_EQ (tok->type, CPP_STRING);
3759
3760   /* Verify that cpp_interpret_string works.  */
3761   cpp_string dst_string;
3762   const enum cpp_ttype type = CPP_STRING;
3763   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3764                                       &dst_string, type);
3765   ASSERT_TRUE (result);
3766   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3767   free (const_cast <unsigned char *> (dst_string.text));
3768
3769   if (!should_have_column_data_p (line_table->highest_location))
3770     return;
3771
3772   /* 0-9, plus the nil terminator.  */
3773   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3774   for (int i = 0; i < 11; i++)
3775     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3776                           i, 1, 7 + i, 7 + i);
3777 }
3778
3779 /* Test of locations within a raw string that contains a newline.  */
3780
3781 static void
3782 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3783 {
3784   /* .....................00.0000.
3785      .....................12.3456.  */
3786   const char *content = ("R\"foo(\n"
3787   /* .....................00000.
3788      .....................12345.  */
3789                          "hello\n"
3790                          "world\n"
3791   /* .....................00000.
3792      .....................12345.  */
3793                          ")foo\"\n");
3794   lexer_test test (case_, content, NULL);
3795
3796   /* Verify that we get the expected token back.  */
3797   const cpp_token *tok = test.get_token ();
3798   ASSERT_EQ (tok->type, CPP_STRING);
3799
3800   /* Verify that cpp_interpret_string works.  */
3801   cpp_string dst_string;
3802   const enum cpp_ttype type = CPP_STRING;
3803   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3804                                       &dst_string, type);
3805   ASSERT_TRUE (result);
3806   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3807   free (const_cast <unsigned char *> (dst_string.text));
3808
3809   if (!should_have_column_data_p (line_table->highest_location))
3810     return;
3811
3812   /* Currently we don't support locations within raw strings that
3813      contain newlines.  */
3814   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3815                                   "range endpoints are on different lines");
3816 }
3817
3818 /* Test of parsing an unterminated raw string.  */
3819
3820 static void
3821 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3822 {
3823   const char *content = "R\"ouch()ouCh\" /* etc */";
3824
3825   lexer_diagnostic_sink diagnostics;
3826   lexer_test test (case_, content, &diagnostics);
3827   test.m_implicitly_expect_EOF = false;
3828
3829   /* Attempt to parse the raw string.  */
3830   const cpp_token *tok = test.get_token ();
3831   ASSERT_EQ (tok->type, CPP_EOF);
3832
3833   ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3834   /* We expect the message "unterminated raw string"
3835      in the "cpplib" translation domain.
3836      It's not clear that dgettext is available on all supported hosts,
3837      so this assertion is commented-out for now.
3838        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3839                      diagnostics.m_diagnostics[0]);
3840   */
3841 }
3842
3843 /* Test of lexing char constants.  */
3844
3845 static void
3846 test_lexer_char_constants (const line_table_case &case_)
3847 {
3848   /* Various char constants.
3849      .....................0000000001111111111.22222222223.
3850      .....................1234567890123456789.01234567890.  */
3851   const char *content = ("         'a'\n"
3852                          "        u'a'\n"
3853                          "        U'a'\n"
3854                          "        L'a'\n"
3855                          "         'abc'\n");
3856   lexer_test test (case_, content, NULL);
3857
3858   /* Verify that we get the expected tokens back.  */
3859   /* 'a'.  */
3860   const cpp_token *tok = test.get_token ();
3861   ASSERT_EQ (tok->type, CPP_CHAR);
3862   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3863
3864   unsigned int chars_seen;
3865   int unsignedp;
3866   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3867                                           &chars_seen, &unsignedp);
3868   ASSERT_EQ (cc, 'a');
3869   ASSERT_EQ (chars_seen, 1);
3870
3871   /* u'a'.  */
3872   tok = test.get_token ();
3873   ASSERT_EQ (tok->type, CPP_CHAR16);
3874   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3875
3876   /* U'a'.  */
3877   tok = test.get_token ();
3878   ASSERT_EQ (tok->type, CPP_CHAR32);
3879   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3880
3881   /* L'a'.  */
3882   tok = test.get_token ();
3883   ASSERT_EQ (tok->type, CPP_WCHAR);
3884   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3885
3886   /* 'abc' (c-char-sequence).  */
3887   tok = test.get_token ();
3888   ASSERT_EQ (tok->type, CPP_CHAR);
3889   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3890 }
3891 /* A table of interesting location_t values, giving one axis of our test
3892    matrix.  */
3893
3894 static const location_t boundary_locations[] = {
3895   /* Zero means "don't override the default values for a new line_table".  */
3896   0,
3897
3898   /* An arbitrary non-zero value that isn't close to one of
3899      the boundary values below.  */
3900   0x10000,
3901
3902   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3903   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3904   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3905   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3906   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3907   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3908
3909   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3910   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3911   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3912   LINE_MAP_MAX_LOCATION_WITH_COLS,
3913   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3914   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3915 };
3916
3917 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3918
3919 void
3920 for_each_line_table_case (void (*testcase) (const line_table_case &))
3921 {
3922   /* As noted above in the description of struct line_table_case,
3923      we want to explore a test matrix of interesting line_table
3924      situations, running various selftests for each case within the
3925      matrix.  */
3926
3927   /* Run all tests with:
3928      (a) line_table->default_range_bits == 0, and
3929      (b) line_table->default_range_bits == 5.  */
3930   int num_cases_tested = 0;
3931   for (int default_range_bits = 0; default_range_bits <= 5;
3932        default_range_bits += 5)
3933     {
3934       /* ...and use each of the "interesting" location values as
3935          the starting location within line_table.  */
3936       const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
3937       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3938         {
3939           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3940
3941           testcase (c);
3942
3943           num_cases_tested++;
3944         }
3945     }
3946
3947   /* Verify that we fully covered the test matrix.  */
3948   ASSERT_EQ (num_cases_tested, 2 * 12);
3949 }
3950
3951 /* Verify that when presented with a consecutive pair of locations with
3952    a very large line offset, we don't attempt to consolidate them into
3953    a single ordinary linemap where the line offsets within the line map
3954    would lead to overflow (PR lto/88147).  */
3955
3956 static void
3957 test_line_offset_overflow ()
3958 {
3959   line_table_test ltt (line_table_case (5, 0));
3960
3961   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3962   linemap_line_start (line_table, 1, 100);
3963   location_t loc_a = linemap_line_start (line_table, 2578, 255);
3964   assert_loceq ("foo.c", 2578, 0, loc_a);
3965
3966   const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3967   ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3968   ASSERT_EQ (ordmap_a->m_range_bits, 5);
3969
3970   location_t loc_b = linemap_line_start (line_table, 404198, 512);
3971   assert_loceq ("foo.c", 404198, 0, loc_b);
3972
3973   /* We should have started a new linemap, rather than attempting to store
3974      a very large line offset.  */
3975   const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3976   ASSERT_NE (ordmap_a, ordmap_b);
3977 }
3978
3979 void test_cpp_utf8 ()
3980 {
3981   const int def_tabstop = 8;
3982   cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3983
3984   /* Verify that wcwidth of invalid UTF-8 or control bytes is 1.  */
3985   {
3986     int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
3987     ASSERT_EQ (8, w_bad);
3988     int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
3989     ASSERT_EQ (5, w_ctrl);
3990   }
3991
3992   /* Verify that wcwidth of valid UTF-8 is as expected.  */
3993   {
3994     const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
3995     ASSERT_EQ (1, w_pi);
3996     const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
3997     ASSERT_EQ (2, w_emoji);
3998     const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3999                                                         policy);
4000     ASSERT_EQ (1, w_umlaut_precomposed);
4001     const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
4002                                                       policy);
4003     ASSERT_EQ (1, w_umlaut_combining);
4004     const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
4005     ASSERT_EQ (2, w_han);
4006     const int w_ascii = cpp_display_width ("GCC", 3, policy);
4007     ASSERT_EQ (3, w_ascii);
4008     const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
4009                                            "\x9f! \xe4\xb8\xba y\xcc\x88",
4010                                            24, policy);
4011     ASSERT_EQ (18, w_mixed);
4012   }
4013
4014   /* Verify that display width properly expands tabs.  */
4015   {
4016     const char *tstr = "\tabc\td";
4017     ASSERT_EQ (6, cpp_display_width (tstr, 6,
4018                                      cpp_char_column_policy (1, cpp_wcwidth)));
4019     ASSERT_EQ (10, cpp_display_width (tstr, 6,
4020                                       cpp_char_column_policy (3, cpp_wcwidth)));
4021     ASSERT_EQ (17, cpp_display_width (tstr, 6,
4022                                       cpp_char_column_policy (8, cpp_wcwidth)));
4023     ASSERT_EQ (1,
4024                cpp_display_column_to_byte_column
4025                  (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
4026   }
4027
4028   /* Verify that cpp_byte_column_to_display_column can go past the end,
4029      and similar edge cases.  */
4030   {
4031     const char *str
4032       /* Display columns.
4033          111111112345  */
4034       = "\xcf\x80 abc";
4035       /* 111122223456
4036          Byte columns.  */
4037
4038     ASSERT_EQ (5, cpp_display_width (str, 6, policy));
4039     ASSERT_EQ (105,
4040                cpp_byte_column_to_display_column (str, 6, 106, policy));
4041     ASSERT_EQ (10000,
4042                cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
4043     ASSERT_EQ (0,
4044                cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
4045   }
4046
4047   /* Verify that cpp_display_column_to_byte_column can go past the end,
4048      and similar edge cases, and check invertibility.  */
4049   {
4050     const char *str
4051       /* Display columns.
4052          000000000000000000000000000000000000011
4053          111111112222222234444444455555555678901  */
4054       = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
4055       /* 000000000000000000000000000000000111111
4056          111122223333444456666777788889999012345
4057          Byte columns.  */
4058     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
4059     ASSERT_EQ (15,
4060                cpp_display_column_to_byte_column (str, 15, 11, policy));
4061     ASSERT_EQ (115,
4062                cpp_display_column_to_byte_column (str, 15, 111, policy));
4063     ASSERT_EQ (10000,
4064                cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
4065     ASSERT_EQ (0,
4066                cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
4067
4068     /* Verify that we do not interrupt a UTF-8 sequence.  */
4069     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
4070
4071     for (int byte_col = 1; byte_col <= 15; ++byte_col)
4072       {
4073         const int disp_col
4074           = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
4075         const int byte_col2
4076           = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
4077
4078         /* If we ask for the display column in the middle of a UTF-8
4079            sequence, it will return the length of the partial sequence,
4080            matching the behavior of GCC before display column support.
4081            Otherwise check the round trip was successful.  */
4082         if (byte_col < 4)
4083           ASSERT_EQ (byte_col, disp_col);
4084         else if (byte_col >= 6 && byte_col < 9)
4085           ASSERT_EQ (3 + (byte_col - 5), disp_col);
4086         else
4087           ASSERT_EQ (byte_col2, byte_col);
4088       }
4089   }
4090 }
4091
4092 static bool
4093 check_cpp_valid_utf8_p (const char *str)
4094 {
4095   return cpp_valid_utf8_p (str, strlen (str));
4096 }
4097
4098 /* Check that cpp_valid_utf8_p works as expected.  */
4099
4100 static void
4101 test_cpp_valid_utf8_p ()
4102 {
4103   ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world"));
4104
4105   /* 2-byte char (pi).  */
4106   ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80"));
4107
4108   /* 3-byte chars (the Japanese word "mojibake").  */
4109   ASSERT_TRUE (check_cpp_valid_utf8_p
4110                (
4111                 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
4112                    UTF-8: 0xE6 0x96 0x87
4113                    C octal escaped UTF-8: \346\226\207.  */
4114                 "\346\226\207"
4115                 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
4116                    UTF-8: 0xE5 0xAD 0x97
4117                    C octal escaped UTF-8: \345\255\227.  */
4118                 "\345\255\227"
4119                 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
4120                    UTF-8: 0xE5 0x8C 0x96
4121                    C octal escaped UTF-8: \345\214\226.  */
4122                 "\345\214\226"
4123                 /* U+3051 HIRAGANA LETTER KE
4124                    UTF-8: 0xE3 0x81 0x91
4125                    C octal escaped UTF-8: \343\201\221.  */
4126                 "\343\201\221"));
4127
4128   /* 4-byte char: an emoji.  */
4129   ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82"));
4130
4131   /* Control codes, including the NUL byte.  */
4132   ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", 5));
4133
4134   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!"));
4135
4136   /* Unexpected continuation bytes.  */
4137   for (unsigned char continuation_byte = 0x80;
4138        continuation_byte <= 0xbf;
4139        continuation_byte++)
4140     ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, 1));
4141
4142   /* "Lonely start characters" for 2-byte sequences.  */
4143   {
4144     unsigned char buf[2];
4145     buf[1] = ' ';
4146     for (buf[0] = 0xc0;
4147          buf[0] <= 0xdf;
4148          buf[0]++)
4149       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4150   }
4151
4152   /* "Lonely start characters" for 3-byte sequences.  */
4153   {
4154     unsigned char buf[2];
4155     buf[1] = ' ';
4156     for (buf[0] = 0xe0;
4157          buf[0] <= 0xef;
4158          buf[0]++)
4159       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4160   }
4161
4162   /* "Lonely start characters" for 4-byte sequences.  */
4163   {
4164     unsigned char buf[2];
4165     buf[1] = ' ';
4166     for (buf[0] = 0xf0;
4167          buf[0] <= 0xf4;
4168          buf[0]++)
4169       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4170   }
4171
4172   /* Invalid start characters (formerly valid for 5-byte and 6-byte
4173      sequences).  */
4174   {
4175     unsigned char buf[2];
4176     buf[1] = ' ';
4177     for (buf[0] = 0xf5;
4178          buf[0] <= 0xfd;
4179          buf[0]++)
4180       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4181   }
4182
4183   /* Impossible bytes.  */
4184   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0"));
4185   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1"));
4186   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe"));
4187   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff"));
4188 }
4189
4190 /* Run all of the selftests within this file.  */
4191
4192 void
4193 input_cc_tests ()
4194 {
4195   test_linenum_comparisons ();
4196   test_should_have_column_data_p ();
4197   test_unknown_location ();
4198   test_builtins ();
4199   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
4200
4201   for_each_line_table_case (test_accessing_ordinary_linemaps);
4202   for_each_line_table_case (test_lexer);
4203   for_each_line_table_case (test_lexer_string_locations_simple);
4204   for_each_line_table_case (test_lexer_string_locations_ebcdic);
4205   for_each_line_table_case (test_lexer_string_locations_hex);
4206   for_each_line_table_case (test_lexer_string_locations_oct);
4207   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
4208   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
4209   for_each_line_table_case (test_lexer_string_locations_ucn4);
4210   for_each_line_table_case (test_lexer_string_locations_ucn8);
4211   for_each_line_table_case (test_lexer_string_locations_wide_string);
4212   for_each_line_table_case (test_lexer_string_locations_string16);
4213   for_each_line_table_case (test_lexer_string_locations_string32);
4214   for_each_line_table_case (test_lexer_string_locations_u8);
4215   for_each_line_table_case (test_lexer_string_locations_utf8_source);
4216   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
4217   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
4218   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
4219   for_each_line_table_case (test_lexer_string_locations_macro);
4220   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
4221   for_each_line_table_case (test_lexer_string_locations_non_string);
4222   for_each_line_table_case (test_lexer_string_locations_long_line);
4223   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
4224   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
4225   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
4226   for_each_line_table_case (test_lexer_char_constants);
4227
4228   test_reading_source_line ();
4229
4230   test_line_offset_overflow ();
4231
4232   test_cpp_utf8 ();
4233   test_cpp_valid_utf8_p ();
4234 }
4235
4236 } // namespace selftest
4237
4238 #endif /* CHECKING_P */