gcc/input.cc

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic.h"
  25 #include "selftest.h"
  26 #include "cpplib.h"
  27
  28 #ifndef HAVE_ICONV
  29 #define HAVE_ICONV 0
  30 #endif
  31
  32 const char *
  33 special_fname_builtin ()
  34 {
  35   return _("<built-in>");
  36 }
  37
  38 /* Input charset configuration.  */
  39 static const char *default_charset_callback (const char *)
  40 {
  41   return nullptr;
  42 }
  43
  44 void
  45 file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
  46                                       bool should_skip_bom)
  47 {
  48   in_context.ccb = (ccb ? ccb : default_charset_callback);
  49   in_context.should_skip_bom = should_skip_bom;
  50 }
  51
  52 /* This is a cache used by get_next_line to store the content of a
  53    file to be searched for file lines.  */
  54 class file_cache_slot
  55 {
  56 public:
  57   file_cache_slot ();
  58   ~file_cache_slot ();
  59
  60   bool read_line_num (size_t line_num,
  61                       char ** line, ssize_t *line_len);
  62
  63   /* Accessors.  */
  64   const char *get_file_path () const { return m_file_path; }
  65   unsigned get_use_count () const { return m_use_count; }
  66   bool missing_trailing_newline_p () const
  67   {
  68     return m_missing_trailing_newline;
  69   }
  70   char_span get_full_file_content ();
  71
  72   void inc_use_count () { m_use_count++; }
  73
  74   bool create (const file_cache::input_context &in_context,
  75                const char *file_path, FILE *fp, unsigned highest_use_count);
  76   void evict ();
  77
  78  private:
  79   /* These are information used to store a line boundary.  */
  80   class line_info
  81   {
  82   public:
  83     /* The line number.  It starts from 1.  */
  84     size_t line_num;
  85
  86     /* The position (byte count) of the beginning of the line,
  87        relative to the file data pointer.  This starts at zero.  */
  88     size_t start_pos;
  89
  90     /* The position (byte count) of the last byte of the line.  This
  91        normally points to the '\n' character, or to one byte after the
  92        last byte of the file, if the file doesn't contain a '\n'
  93        character.  */
  94     size_t end_pos;
  95
  96     line_info (size_t l, size_t s, size_t e)
  97       : line_num (l), start_pos (s), end_pos (e)
  98     {}
  99
 100     line_info ()
 101       :line_num (0), start_pos (0), end_pos (0)
 102     {}
 103   };
 104
 105   bool needs_read_p () const;
 106   bool needs_grow_p () const;
 107   void maybe_grow ();
 108   bool read_data ();
 109   bool maybe_read_data ();
 110   bool get_next_line (char **line, ssize_t *line_len);
 111   bool read_next_line (char ** line, ssize_t *line_len);
 112   bool goto_next_line ();
 113
 114   static const size_t buffer_size = 4 * 1024;
 115   static const size_t line_record_size = 100;
 116
 117   /* The number of time this file has been accessed.  This is used
 118      to designate which file cache to evict from the cache
 119      array.  */
 120   unsigned m_use_count;
 121
 122   /* The file_path is the key for identifying a particular file in
 123      the cache.
 124      For libcpp-using code, the underlying buffer for this field is
 125      owned by the corresponding _cpp_file within the cpp_reader.  */
 126   const char *m_file_path;
 127
 128   FILE *m_fp;
 129
 130   /* This points to the content of the file that we've read so
 131      far.  */
 132   char *m_data;
 133
 134   /* The allocated buffer to be freed may start a little earlier than DATA,
 135      e.g. if a UTF8 BOM was skipped at the beginning.  */
 136   int m_alloc_offset;
 137
 138   /*  The size of the DATA array above.*/
 139   size_t m_size;
 140
 141   /* The number of bytes read from the underlying file so far.  This
 142      must be less (or equal) than SIZE above.  */
 143   size_t m_nb_read;
 144
 145   /* The index of the beginning of the current line.  */
 146   size_t m_line_start_idx;
 147
 148   /* The number of the previous line read.  This starts at 1.  Zero
 149      means we've read no line so far.  */
 150   size_t m_line_num;
 151
 152   /* This is the total number of lines of the current file.  At the
 153      moment, we try to get this information from the line map
 154      subsystem.  Note that this is just a hint.  When using the C++
 155      front-end, this hint is correct because the input file is then
 156      completely tokenized before parsing starts; so the line map knows
 157      the number of lines before compilation really starts.  For e.g,
 158      the C front-end, it can happen that we start emitting diagnostics
 159      before the line map has seen the end of the file.  */
 160   size_t m_total_lines;
 161
 162   /* Could this file be missing a trailing newline on its final line?
 163      Initially true (to cope with empty files), set to true/false
 164      as each line is read.  */
 165   bool m_missing_trailing_newline;
 166
 167   /* This is a record of the beginning and end of the lines we've seen
 168      while reading the file.  This is useful to avoid walking the data
 169      from the beginning when we are asked to read a line that is
 170      before LINE_START_IDX above.  Note that the maximum size of this
 171      record is line_record_size, so that the memory consumption
 172      doesn't explode.  We thus scale total_lines down to
 173      line_record_size.  */
 174   vec<line_info, va_heap> m_line_record;
 175
 176   void offset_buffer (int offset)
 177   {
 178     gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
 179                 : (size_t) offset <= m_size);
 180     gcc_assert (m_data);
 181     m_alloc_offset += offset;
 182     m_data += offset;
 183     m_size -= offset;
 184   }
 185
 186 };
 187
 188 /* Current position in real source file.  */
 189
 190 location_t input_location = UNKNOWN_LOCATION;
 191
 192 class line_maps *line_table;
 193
 194 /* A stashed copy of "line_table" for use by selftest::line_table_test.
 195    This needs to be a global so that it can be a GC root, and thus
 196    prevent the stashed copy from being garbage-collected if the GC runs
 197    during a line_table_test.  */
 198
 199 class line_maps *saved_line_table;
 200
 201 /* Expand the source location LOC into a human readable location.  If
 202    LOC resolves to a builtin location, the file name of the readable
 203    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 204    TRUE and LOC is virtual, then it is resolved to the expansion
 205    point of the involved macro.  Otherwise, it is resolved to the
 206    spelling location of the token.
 207
 208    When resolving to the spelling location of the token, if the
 209    resulting location is for a built-in location (that is, it has no
 210    associated line/column) in the context of a macro expansion, the
 211    returned location is the first one (while unwinding the macro
 212    location towards its expansion point) that is in real source
 213    code.
 214
 215    ASPECT controls which part of the location to use.  */
 216
 217 static expanded_location
 218 expand_location_1 (const line_maps *set,
 219                    location_t loc,
 220                    bool expansion_point_p,
 221                    enum location_aspect aspect)
 222 {
 223   expanded_location xloc;
 224   const line_map_ordinary *map;
 225   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 226   tree block = NULL;
 227
 228   if (IS_ADHOC_LOC (loc))
 229     {
 230       block = LOCATION_BLOCK (loc);
 231       loc = LOCATION_LOCUS (loc);
 232     }
 233
 234   memset (&xloc, 0, sizeof (xloc));
 235
 236   if (loc >= RESERVED_LOCATION_COUNT)
 237     {
 238       if (!expansion_point_p)
 239         {
 240           /* We want to resolve LOC to its spelling location.
 241
 242              But if that spelling location is a reserved location that
 243              appears in the context of a macro expansion (like for a
 244              location for a built-in token), let's consider the first
 245              location (toward the expansion point) that is not reserved;
 246              that is, the first location that is in real source code.  */
 247           loc = linemap_unwind_to_first_non_reserved_loc (set,
 248                                                           loc, NULL);
 249           lrk = LRK_SPELLING_LOCATION;
 250         }
 251       loc = linemap_resolve_location (set, loc, lrk, &map);
 252
 253       /* loc is now either in an ordinary map, or is a reserved location.
 254          If it is a compound location, the caret is in a spelling location,
 255          but the start/finish might still be a virtual location.
 256          Depending of what the caller asked for, we may need to recurse
 257          one level in order to resolve any virtual locations in the
 258          end-points.  */
 259       switch (aspect)
 260         {
 261         default:
 262           gcc_unreachable ();
 263           /* Fall through.  */
 264         case LOCATION_ASPECT_CARET:
 265           break;
 266         case LOCATION_ASPECT_START:
 267           {
 268             location_t start = get_start (loc);
 269             if (start != loc)
 270               return expand_location_1 (set, start, expansion_point_p, aspect);
 271           }
 272           break;
 273         case LOCATION_ASPECT_FINISH:
 274           {
 275             location_t finish = get_finish (loc);
 276             if (finish != loc)
 277               return expand_location_1 (set, finish, expansion_point_p, aspect);
 278           }
 279           break;
 280         }
 281       xloc = linemap_expand_location (set, map, loc);
 282     }
 283
 284   xloc.data = block;
 285   if (loc <= BUILTINS_LOCATION)
 286     xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin ();
 287
 288   return xloc;
 289 }
 290
 291 /* Initialize the set of cache used for files accessed by caret
 292    diagnostic.  */
 293
 294 static void
 295 diagnostic_file_cache_init (void)
 296 {
 297   gcc_assert (global_dc);
 298   global_dc->file_cache_init ();
 299 }
 300
 301 void
 302 diagnostic_context::file_cache_init ()
 303 {
 304   if (m_file_cache == nullptr)
 305     m_file_cache = new file_cache ();
 306 }
 307
 308 /* Return the total lines number that have been read so far by the
 309    line map (in the preprocessor) so far.  For languages like C++ that
 310    entirely preprocess the input file before starting to parse, this
 311    equals the actual number of lines of the file.  */
 312
 313 static size_t
 314 total_lines_num (const char *file_path)
 315 {
 316   size_t r = 0;
 317   location_t l = 0;
 318   if (linemap_get_file_highest_location (line_table, file_path, &l))
 319     {
 320       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 321       expanded_location xloc = expand_location (l);
 322       r = xloc.line;
 323     }
 324   return r;
 325 }
 326
 327 /* Lookup the cache used for the content of a given file accessed by
 328    caret diagnostic.  Return the found cached file, or NULL if no
 329    cached file was found.  */
 330
 331 file_cache_slot *
 332 file_cache::lookup_file (const char *file_path)
 333 {
 334   gcc_assert (file_path);
 335
 336   /* This will contain the found cached file.  */
 337   file_cache_slot *r = NULL;
 338   for (unsigned i = 0; i < num_file_slots; ++i)
 339     {
 340       file_cache_slot *c = &m_file_slots[i];
 341       if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
 342         {
 343           c->inc_use_count ();
 344           r = c;
 345         }
 346     }
 347
 348   if (r)
 349     r->inc_use_count ();
 350
 351   return r;
 352 }
 353
 354 /* Purge any mention of FILENAME from the cache of files used for
 355    printing source code.  For use in selftests when working
 356    with tempfiles.  */
 357
 358 void
 359 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
 360 {
 361   gcc_assert (file_path);
 362
 363   auto file_cache = global_dc->get_file_cache ();
 364   if (!file_cache)
 365     return;
 366   file_cache->forcibly_evict_file (file_path);
 367 }
 368
 369 void
 370 file_cache::forcibly_evict_file (const char *file_path)
 371 {
 372   gcc_assert (file_path);
 373
 374   file_cache_slot *r = lookup_file (file_path);
 375   if (!r)
 376     /* Not found.  */
 377     return;
 378
 379   r->evict ();
 380 }
 381
 382 void
 383 file_cache_slot::evict ()
 384 {
 385   m_file_path = NULL;
 386   if (m_fp)
 387     fclose (m_fp);
 388   m_fp = NULL;
 389   m_nb_read = 0;
 390   m_line_start_idx = 0;
 391   m_line_num = 0;
 392   m_line_record.truncate (0);
 393   m_use_count = 0;
 394   m_total_lines = 0;
 395   m_missing_trailing_newline = true;
 396 }
 397
 398 /* Return the file cache that has been less used, recently, or the
 399    first empty one.  If HIGHEST_USE_COUNT is non-null,
 400    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 401    in the cache table.  */
 402
 403 file_cache_slot*
 404 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
 405 {
 406   diagnostic_file_cache_init ();
 407
 408   file_cache_slot *to_evict = &m_file_slots[0];
 409   unsigned huc = to_evict->get_use_count ();
 410   for (unsigned i = 1; i < num_file_slots; ++i)
 411     {
 412       file_cache_slot *c = &m_file_slots[i];
 413       bool c_is_empty = (c->get_file_path () == NULL);
 414
 415       if (c->get_use_count () < to_evict->get_use_count ()
 416           || (to_evict->get_file_path () && c_is_empty))
 417         /* We evict C because it's either an entry with a lower use
 418            count or one that is empty.  */
 419         to_evict = c;
 420
 421       if (huc < c->get_use_count ())
 422         huc = c->get_use_count ();
 423
 424       if (c_is_empty)
 425         /* We've reached the end of the cache; subsequent elements are
 426            all empty.  */
 427         break;
 428     }
 429
 430   if (highest_use_count)
 431     *highest_use_count = huc;
 432
 433   return to_evict;
 434 }
 435
 436 /* Create the cache used for the content of a given file to be
 437    accessed by caret diagnostic.  This cache is added to an array of
 438    cache and can be retrieved by lookup_file_in_cache_tab.  This
 439    function returns the created cache.  Note that only the last
 440    num_file_slots files are cached.
 441
 442    This can return nullptr if the FILE_PATH can't be opened for
 443    reading, or if the content can't be converted to the input_charset.  */
 444
 445 file_cache_slot*
 446 file_cache::add_file (const char *file_path)
 447 {
 448
 449   FILE *fp = fopen (file_path, "r");
 450   if (fp == NULL)
 451     return NULL;
 452
 453   unsigned highest_use_count = 0;
 454   file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
 455   if (!r->create (in_context, file_path, fp, highest_use_count))
 456     return NULL;
 457   return r;
 458 }
 459
 460 /* Get a borrowed char_span to the full content of this file
 461    as decoded according to the input charset, encoded as UTF-8.  */
 462
 463 char_span
 464 file_cache_slot::get_full_file_content ()
 465 {
 466   char *line;
 467   ssize_t line_len;
 468   while (get_next_line (&line, &line_len))
 469     {
 470     }
 471   return char_span (m_data, m_nb_read);
 472 }
 473
 474 /* Populate this slot for use on FILE_PATH and FP, dropping any
 475    existing cached content within it.  */
 476
 477 bool
 478 file_cache_slot::create (const file_cache::input_context &in_context,
 479                          const char *file_path, FILE *fp,
 480                          unsigned highest_use_count)
 481 {
 482   m_file_path = file_path;
 483   if (m_fp)
 484     fclose (m_fp);
 485   m_fp = fp;
 486   if (m_alloc_offset)
 487     offset_buffer (-m_alloc_offset);
 488   m_nb_read = 0;
 489   m_line_start_idx = 0;
 490   m_line_num = 0;
 491   m_line_record.truncate (0);
 492   /* Ensure that this cache entry doesn't get evicted next time
 493      add_file_to_cache_tab is called.  */
 494   m_use_count = ++highest_use_count;
 495   m_total_lines = total_lines_num (file_path);
 496   m_missing_trailing_newline = true;
 497
 498
 499   /* Check the input configuration to determine if we need to do any
 500      transformations, such as charset conversion or BOM skipping.  */
 501   if (const char *input_charset = in_context.ccb (file_path))
 502     {
 503       /* Need a full-blown conversion of the input charset.  */
 504       fclose (m_fp);
 505       m_fp = NULL;
 506       const cpp_converted_source cs
 507         = cpp_get_converted_source (file_path, input_charset);
 508       if (!cs.data)
 509         return false;
 510       if (m_data)
 511         XDELETEVEC (m_data);
 512       m_data = cs.data;
 513       m_nb_read = m_size = cs.len;
 514       m_alloc_offset = cs.data - cs.to_free;
 515     }
 516   else if (in_context.should_skip_bom)
 517     {
 518       if (read_data ())
 519         {
 520           const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
 521           offset_buffer (offset);
 522           m_nb_read -= offset;
 523         }
 524     }
 525
 526   return true;
 527 }
 528
 529 /* file_cache's ctor.  */
 530
 531 file_cache::file_cache ()
 532 : m_file_slots (new file_cache_slot[num_file_slots])
 533 {
 534   initialize_input_context (nullptr, false);
 535 }
 536
 537 /* file_cache's dtor.  */
 538
 539 file_cache::~file_cache ()
 540 {
 541   delete[] m_file_slots;
 542 }
 543
 544 /* Lookup the cache used for the content of a given file accessed by
 545    caret diagnostic.  If no cached file was found, create a new cache
 546    for this file, add it to the array of cached file and return
 547    it.
 548
 549    This can return nullptr on a cache miss if FILE_PATH can't be opened for
 550    reading, or if the content can't be converted to the input_charset.  */
 551
 552 file_cache_slot*
 553 file_cache::lookup_or_add_file (const char *file_path)
 554 {
 555   file_cache_slot *r = lookup_file (file_path);
 556   if (r == NULL)
 557     r = add_file (file_path);
 558   return r;
 559 }
 560
 561 /* Default constructor for a cache of file used by caret
 562    diagnostic.  */
 563
 564 file_cache_slot::file_cache_slot ()
 565 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
 566   m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
 567   m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
 568 {
 569   m_line_record.create (0);
 570 }
 571
 572 /* Destructor for a cache of file used by caret diagnostic.  */
 573
 574 file_cache_slot::~file_cache_slot ()
 575 {
 576   if (m_fp)
 577     {
 578       fclose (m_fp);
 579       m_fp = NULL;
 580     }
 581   if (m_data)
 582     {
 583       offset_buffer (-m_alloc_offset);
 584       XDELETEVEC (m_data);
 585       m_data = 0;
 586     }
 587   m_line_record.release ();
 588 }
 589
 590 /* Returns TRUE iff the cache would need to be filled with data coming
 591    from the file.  That is, either the cache is empty or full or the
 592    current line is empty.  Note that if the cache is full, it would
 593    need to be extended and filled again.  */
 594
 595 bool
 596 file_cache_slot::needs_read_p () const
 597 {
 598   return m_fp && (m_nb_read == 0
 599           || m_nb_read == m_size
 600           || (m_line_start_idx >= m_nb_read - 1));
 601 }
 602
 603 /*  Return TRUE iff the cache is full and thus needs to be
 604     extended.  */
 605
 606 bool
 607 file_cache_slot::needs_grow_p () const
 608 {
 609   return m_nb_read == m_size;
 610 }
 611
 612 /* Grow the cache if it needs to be extended.  */
 613
 614 void
 615 file_cache_slot::maybe_grow ()
 616 {
 617   if (!needs_grow_p ())
 618     return;
 619
 620   if (!m_data)
 621     {
 622       gcc_assert (m_size == 0 && m_alloc_offset == 0);
 623       m_size = buffer_size;
 624       m_data = XNEWVEC (char, m_size);
 625     }
 626   else
 627     {
 628       const int offset = m_alloc_offset;
 629       offset_buffer (-offset);
 630       m_size *= 2;
 631       m_data = XRESIZEVEC (char, m_data, m_size);
 632       offset_buffer (offset);
 633     }
 634 }
 635
 636 /*  Read more data into the cache.  Extends the cache if need be.
 637     Returns TRUE iff new data could be read.  */
 638
 639 bool
 640 file_cache_slot::read_data ()
 641 {
 642   if (feof (m_fp) || ferror (m_fp))
 643     return false;
 644
 645   maybe_grow ();
 646
 647   char * from = m_data + m_nb_read;
 648   size_t to_read = m_size - m_nb_read;
 649   size_t nb_read = fread (from, 1, to_read, m_fp);
 650
 651   if (ferror (m_fp))
 652     return false;
 653
 654   m_nb_read += nb_read;
 655   return !!nb_read;
 656 }
 657
 658 /* Read new data iff the cache needs to be filled with more data
 659    coming from the file FP.  Return TRUE iff the cache was filled with
 660    mode data.  */
 661
 662 bool
 663 file_cache_slot::maybe_read_data ()
 664 {
 665   if (!needs_read_p ())
 666     return false;
 667   return read_data ();
 668 }
 669
 670 /* Helper function for file_cache_slot::get_next_line (), to find the end of
 671    the next line.  Returns with the memchr convention, i.e. nullptr if a line
 672    terminator was not found.  We need to determine line endings in the same
 673    manner that libcpp does: any of \n, \r\n, or \r is a line ending.  */
 674
 675 static char *
 676 find_end_of_line (char *s, size_t len)
 677 {
 678   for (const auto end = s + len; s != end; ++s)
 679     {
 680       if (*s == '\n')
 681         return s;
 682       if (*s == '\r')
 683         {
 684           const auto next = s + 1;
 685           if (next == end)
 686             {
 687               /* Don't find the line ending if \r is the very last character
 688                  in the buffer; we do not know if it's the end of the file or
 689                  just the end of what has been read so far, and we wouldn't
 690                  want to break in the middle of what's actually a \r\n
 691                  sequence.  Instead, we will handle the case of a file ending
 692                  in a \r later.  */
 693               break;
 694             }
 695           return (*next == '\n' ? next : s);
 696         }
 697     }
 698   return nullptr;
 699 }
 700
 701 /* Read a new line from file FP, using C as a cache for the data
 702    coming from the file.  Upon successful completion, *LINE is set to
 703    the beginning of the line found.  *LINE points directly in the
 704    line cache and is only valid until the next call of get_next_line.
 705    *LINE_LEN is set to the length of the line.  Note that the line
 706    does not contain any terminal delimiter.  This function returns
 707    true if some data was read or process from the cache, false
 708    otherwise.  Note that subsequent calls to get_next_line might
 709    make the content of *LINE invalid.  */
 710
 711 bool
 712 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
 713 {
 714   /* Fill the cache with data to process.  */
 715   maybe_read_data ();
 716
 717   size_t remaining_size = m_nb_read - m_line_start_idx;
 718   if (remaining_size == 0)
 719     /* There is no more data to process.  */
 720     return false;
 721
 722   char *line_start = m_data + m_line_start_idx;
 723
 724   char *next_line_start = NULL;
 725   size_t len = 0;
 726   char *line_end = find_end_of_line (line_start, remaining_size);
 727   if (line_end == NULL)
 728     {
 729       /* We haven't found an end-of-line delimiter in the cache.
 730          Fill the cache with more data from the file and look again.  */
 731       while (maybe_read_data ())
 732         {
 733           line_start = m_data + m_line_start_idx;
 734           remaining_size = m_nb_read - m_line_start_idx;
 735           line_end = find_end_of_line (line_start, remaining_size);
 736           if (line_end != NULL)
 737             {
 738               next_line_start = line_end + 1;
 739               break;
 740             }
 741         }
 742       if (line_end == NULL)
 743         {
 744           /* We've loaded all the file into the cache and still no
 745              terminator.  Let's say the line ends up at one byte past the
 746              end of the file.  This is to stay consistent with the case
 747              of when the line ends up with a terminator and line_end points to
 748              that.  That consistency is useful below in the len calculation.
 749
 750              If the file ends in a \r, we didn't identify it as a line
 751              terminator above, so do that now instead.  */
 752           line_end = m_data + m_nb_read;
 753           if (m_nb_read && line_end[-1] == '\r')
 754             {
 755               --line_end;
 756               m_missing_trailing_newline = false;
 757             }
 758           else
 759             m_missing_trailing_newline = true;
 760         }
 761       else
 762         m_missing_trailing_newline = false;
 763     }
 764   else
 765     {
 766       next_line_start = line_end + 1;
 767       m_missing_trailing_newline = false;
 768     }
 769
 770   if (m_fp && ferror (m_fp))
 771     return false;
 772
 773   /* At this point, we've found the end of the of line.  It either points to
 774      the line terminator or to one byte after the last byte of the file.  */
 775   gcc_assert (line_end != NULL);
 776
 777   len = line_end - line_start;
 778
 779   if (m_line_start_idx < m_nb_read)
 780     *line = line_start;
 781
 782   ++m_line_num;
 783
 784   /* Before we update our line record, make sure the hint about the
 785      total number of lines of the file is correct.  If it's not, then
 786      we give up recording line boundaries from now on.  */
 787   bool update_line_record = true;
 788   if (m_line_num > m_total_lines)
 789     update_line_record = false;
 790
 791     /* Now update our line record so that re-reading lines from the
 792      before m_line_start_idx is faster.  */
 793   if (update_line_record
 794       && m_line_record.length () < line_record_size)
 795     {
 796       /* If the file lines fits in the line record, we just record all
 797          its lines ...*/
 798       if (m_total_lines <= line_record_size
 799           && m_line_num > m_line_record.length ())
 800         m_line_record.safe_push
 801           (file_cache_slot::line_info (m_line_num,
 802                                        m_line_start_idx,
 803                                        line_end - m_data));
 804       else if (m_total_lines > line_record_size)
 805         {
 806           /* ... otherwise, we just scale total_lines down to
 807              (line_record_size lines.  */
 808           size_t n = (m_line_num * line_record_size) / m_total_lines;
 809           if (m_line_record.length () == 0
 810               || n >= m_line_record.length ())
 811             m_line_record.safe_push
 812               (file_cache_slot::line_info (m_line_num,
 813                                            m_line_start_idx,
 814                                            line_end - m_data));
 815         }
 816     }
 817
 818   /* Update m_line_start_idx so that it points to the next line to be
 819      read.  */
 820   if (next_line_start)
 821     m_line_start_idx = next_line_start - m_data;
 822   else
 823     /* We didn't find any terminal '\n'.  Let's consider that the end
 824        of line is the end of the data in the cache.  The next
 825        invocation of get_next_line will either read more data from the
 826        underlying file or return false early because we've reached the
 827        end of the file.  */
 828     m_line_start_idx = m_nb_read;
 829
 830   *line_len = len;
 831
 832   return true;
 833 }
 834
 835 /* Consume the next bytes coming from the cache (or from its
 836    underlying file if there are remaining unread bytes in the file)
 837    until we reach the next end-of-line (or end-of-file).  There is no
 838    copying from the cache involved.  Return TRUE upon successful
 839    completion.  */
 840
 841 bool
 842 file_cache_slot::goto_next_line ()
 843 {
 844   char *l;
 845   ssize_t len;
 846
 847   return get_next_line (&l, &len);
 848 }
 849
 850 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 851    If the line was read successfully, *LINE points to the beginning
 852    of the line in the file cache and *LINE_LEN is the length of the
 853    line.  *LINE is not nul-terminated, but may contain zero bytes.
 854    *LINE is only valid until the next call of read_line_num.
 855    This function returns bool if a line was read.  */
 856
 857 bool
 858 file_cache_slot::read_line_num (size_t line_num,
 859                        char ** line, ssize_t *line_len)
 860 {
 861   gcc_assert (line_num > 0);
 862
 863   if (line_num <= m_line_num)
 864     {
 865       /* We've been asked to read lines that are before m_line_num.
 866          So lets use our line record (if it's not empty) to try to
 867          avoid re-reading the file from the beginning again.  */
 868
 869       if (m_line_record.is_empty ())
 870         {
 871           m_line_start_idx = 0;
 872           m_line_num = 0;
 873         }
 874       else
 875         {
 876           file_cache_slot::line_info *i = NULL;
 877           if (m_total_lines <= line_record_size)
 878             {
 879               /* In languages where the input file is not totally
 880                  preprocessed up front, the m_total_lines hint
 881                  can be smaller than the number of lines of the
 882                  file.  In that case, only the first
 883                  m_total_lines have been recorded.
 884
 885                  Otherwise, the first m_total_lines we've read have
 886                  their start/end recorded here.  */
 887               i = (line_num <= m_total_lines)
 888                 ? &m_line_record[line_num - 1]
 889                 : &m_line_record[m_total_lines - 1];
 890               gcc_assert (i->line_num <= line_num);
 891             }
 892           else
 893             {
 894               /*  So the file had more lines than our line record
 895                   size.  Thus the number of lines we've recorded has
 896                   been scaled down to line_record_size.  Let's
 897                   pick the start/end of the recorded line that is
 898                   closest to line_num.  */
 899               size_t n = (line_num <= m_total_lines)
 900                 ? line_num * line_record_size / m_total_lines
 901                 : m_line_record.length () - 1;
 902               if (n < m_line_record.length ())
 903                 {
 904                   i = &m_line_record[n];
 905                   gcc_assert (i->line_num <= line_num);
 906                 }
 907             }
 908
 909           if (i && i->line_num == line_num)
 910             {
 911               /* We have the start/end of the line.  */
 912               *line = m_data + i->start_pos;
 913               *line_len = i->end_pos - i->start_pos;
 914               return true;
 915             }
 916
 917           if (i)
 918             {
 919               m_line_start_idx = i->start_pos;
 920               m_line_num = i->line_num - 1;
 921             }
 922           else
 923             {
 924               m_line_start_idx = 0;
 925               m_line_num = 0;
 926             }
 927         }
 928     }
 929
 930   /*  Let's walk from line m_line_num up to line_num - 1, without
 931       copying any line.  */
 932   while (m_line_num < line_num - 1)
 933     if (!goto_next_line ())
 934       return false;
 935
 936   /* The line we want is the next one.  Let's read and copy it back to
 937      the caller.  */
 938   return get_next_line (line, line_len);
 939 }
 940
 941 /* Return the physical source line that corresponds to FILE_PATH/LINE.
 942    The line is not nul-terminated.  The returned pointer is only
 943    valid until the next call of location_get_source_line.
 944    Note that the line can contain several null characters,
 945    so the returned value's length has the actual length of the line.
 946    If the function fails, a NULL char_span is returned.  */
 947
 948 char_span
 949 file_cache::get_source_line (const char *file_path, int line)
 950 {
 951   char *buffer = NULL;
 952   ssize_t len;
 953
 954   if (line == 0)
 955     return char_span (NULL, 0);
 956
 957   if (file_path == NULL)
 958     return char_span (NULL, 0);
 959
 960   file_cache_slot *c = lookup_or_add_file (file_path);
 961   if (c == NULL)
 962     return char_span (NULL, 0);
 963
 964   bool read = c->read_line_num (line, &buffer, &len);
 965   if (!read)
 966     return char_span (NULL, 0);
 967
 968   return char_span (buffer, len);
 969 }
 970
 971 char_span
 972 location_get_source_line (const char *file_path, int line)
 973 {
 974   diagnostic_file_cache_init ();
 975   return global_dc->get_file_cache ()->get_source_line (file_path, line);
 976 }
 977
 978 /* Return a NUL-terminated copy of the source text between two locations, or
 979    NULL if the arguments are invalid.  The caller is responsible for freeing
 980    the return value.  */
 981
 982 char *
 983 get_source_text_between (location_t start, location_t end)
 984 {
 985   expanded_location expstart =
 986     expand_location_to_spelling_point (start, LOCATION_ASPECT_START);
 987   expanded_location expend =
 988     expand_location_to_spelling_point (end, LOCATION_ASPECT_FINISH);
 989
 990   /* If the locations are in different files or the end comes before the
 991      start, give up and return nothing.  */
 992   if (!expstart.file || !expend.file)
 993     return NULL;
 994   if (strcmp (expstart.file, expend.file) != 0)
 995     return NULL;
 996   if (expstart.line > expend.line)
 997     return NULL;
 998   if (expstart.line == expend.line
 999       && expstart.column > expend.column)
1000     return NULL;
1001   /* These aren't real column numbers, give up.  */
1002   if (expstart.column == 0 || expend.column == 0)
1003     return NULL;
1004
1005   /* For a single line we need to trim both edges.  */
1006   if (expstart.line == expend.line)
1007     {
1008       char_span line = location_get_source_line (expstart.file, expstart.line);
1009       if (line.length () < 1)
1010         return NULL;
1011       int s = expstart.column - 1;
1012       int len = expend.column - s;
1013       if (line.length () < (size_t)expend.column)
1014         return NULL;
1015       return line.subspan (s, len).xstrdup ();
1016     }
1017
1018   struct obstack buf_obstack;
1019   obstack_init (&buf_obstack);
1020
1021   /* Loop through all lines in the range and append each to buf; may trim
1022      parts of the start and end lines off depending on column values.  */
1023   for (int lnum = expstart.line; lnum <= expend.line; ++lnum)
1024     {
1025       char_span line = location_get_source_line (expstart.file, lnum);
1026       if (line.length () < 1 && (lnum != expstart.line && lnum != expend.line))
1027         continue;
1028
1029       /* For the first line in the range, only start at expstart.column */
1030       if (lnum == expstart.line)
1031         {
1032           unsigned off = expstart.column - 1;
1033           if (line.length () < off)
1034             return NULL;
1035           line = line.subspan (off, line.length() - off);
1036         }
1037       /* For the last line, don't go past expend.column */
1038       else if (lnum == expend.line)
1039         {
1040           if (line.length () < (size_t)expend.column)
1041             return NULL;
1042           line = line.subspan (0, expend.column);
1043         }
1044
1045       /* Combine spaces at the beginning of later lines.  */
1046       if (lnum > expstart.line)
1047         {
1048           unsigned off;
1049           for (off = 0; off < line.length(); ++off)
1050             if (line[off] != ' ' && line[off] != '\t')
1051               break;
1052           if (off > 0)
1053             {
1054               obstack_1grow (&buf_obstack, ' ');
1055               line = line.subspan (off, line.length() - off);
1056             }
1057         }
1058
1059       /* This does not include any trailing newlines.  */
1060       obstack_grow (&buf_obstack, line.get_buffer (), line.length ());
1061     }
1062
1063   /* NUL-terminate and finish the buf obstack.  */
1064   obstack_1grow (&buf_obstack, 0);
1065   const char *buf = (const char *) obstack_finish (&buf_obstack);
1066
1067   return xstrdup (buf);
1068 }
1069
1070
1071 char_span
1072 file_cache::get_source_file_content (const char *file_path)
1073 {
1074   file_cache_slot *c = lookup_or_add_file (file_path);
1075   if (c == nullptr)
1076     return char_span (nullptr, 0);
1077   return c->get_full_file_content ();
1078 }
1079
1080
1081 /* Get a borrowed char_span to the full content of FILE_PATH
1082    as decoded according to the input charset, encoded as UTF-8.  */
1083
1084 char_span
1085 get_source_file_content (const char *file_path)
1086 {
1087   diagnostic_file_cache_init ();
1088   return global_dc->get_file_cache ()->get_source_file_content (file_path);
1089 }
1090
1091 /* Determine if FILE_PATH missing a trailing newline on its final line.
1092    Only valid to call once all of the file has been loaded, by
1093    requesting a line number beyond the end of the file.  */
1094
1095 bool
1096 location_missing_trailing_newline (const char *file_path)
1097 {
1098   diagnostic_file_cache_init ();
1099
1100   file_cache_slot *c = global_dc->get_file_cache ()->lookup_or_add_file (file_path);
1101   if (c == NULL)
1102     return false;
1103
1104   return c->missing_trailing_newline_p ();
1105 }
1106
1107 /* Test if the location originates from the spelling location of a
1108    builtin-tokens.  That is, return TRUE if LOC is a (possibly
1109    virtual) location of a built-in token that appears in the expansion
1110    list of a macro.  Please note that this function also works on
1111    tokens that result from built-in tokens.  For instance, the
1112    function would return true if passed a token "4" that is the result
1113    of the expansion of the built-in __LINE__ macro.  */
1114 bool
1115 is_location_from_builtin_token (location_t loc)
1116 {
1117   const line_map_ordinary *map = NULL;
1118   loc = linemap_resolve_location (line_table, loc,
1119                                   LRK_SPELLING_LOCATION, &map);
1120   return loc == BUILTINS_LOCATION;
1121 }
1122
1123 /* Expand the source location LOC into a human readable location.  If
1124    LOC is virtual, it resolves to the expansion point of the involved
1125    macro.  If LOC resolves to a builtin location, the file name of the
1126    readable location is set to the string "<built-in>".  */
1127
1128 expanded_location
1129 expand_location (location_t loc)
1130 {
1131   return expand_location_1 (line_table, loc, /*expansion_point_p=*/true,
1132                             LOCATION_ASPECT_CARET);
1133 }
1134
1135 /* Expand the source location LOC into a human readable location.  If
1136    LOC is virtual, it resolves to the expansion location of the
1137    relevant macro.  If LOC resolves to a builtin location, the file
1138    name of the readable location is set to the string
1139    "<built-in>".  */
1140
1141 expanded_location
1142 expand_location_to_spelling_point (location_t loc,
1143                                    enum location_aspect aspect)
1144 {
1145   return expand_location_1 (line_table, loc, /*expansion_point_p=*/false,
1146                             aspect);
1147 }
1148
1149 /* The rich_location class within libcpp requires a way to expand
1150    location_t instances, and relies on the client code
1151    providing a symbol named
1152      linemap_client_expand_location_to_spelling_point
1153    to do this.
1154
1155    This is the implementation for libcommon.a (all host binaries),
1156    which simply calls into expand_location_1.  */
1157
1158 expanded_location
1159 linemap_client_expand_location_to_spelling_point (const line_maps *set,
1160                                                   location_t loc,
1161                                                   enum location_aspect aspect)
1162 {
1163   return expand_location_1 (set, loc, /*expansion_point_p=*/false, aspect);
1164 }
1165
1166
1167 /* If LOCATION is in a system header and if it is a virtual location
1168    for a token coming from the expansion of a macro, unwind it to
1169    the location of the expansion point of the macro.  If the expansion
1170    point is also in a system header return the original LOCATION.
1171    Otherwise, return the location of the expansion point.
1172
1173    This is used for instance when we want to emit diagnostics about a
1174    token that may be located in a macro that is itself defined in a
1175    system header, for example, for the NULL macro.  In such a case, if
1176    LOCATION were passed directly to diagnostic functions such as
1177    warning_at, the diagnostic would be suppressed (unless
1178    -Wsystem-headers).  */
1179
1180 location_t
1181 expansion_point_location_if_in_system_header (location_t location)
1182 {
1183   if (!in_system_header_at (location))
1184     return location;
1185
1186   location_t xloc = linemap_resolve_location (line_table, location,
1187                                               LRK_MACRO_EXPANSION_POINT,
1188                                               NULL);
1189   return in_system_header_at (xloc) ? location : xloc;
1190 }
1191
1192 /* If LOCATION is a virtual location for a token coming from the expansion
1193    of a macro, unwind to the location of the expansion point of the macro.  */
1194
1195 location_t
1196 expansion_point_location (location_t location)
1197 {
1198   return linemap_resolve_location (line_table, location,
1199                                    LRK_MACRO_EXPANSION_POINT, NULL);
1200 }
1201
1202 /* Construct a location with caret at CARET, ranging from START to
1203    FINISH.
1204
1205    For example, consider:
1206
1207                  11111111112
1208         12345678901234567890
1209      522
1210      523   return foo + bar;
1211                   ~~~~^~~~~
1212      524
1213
1214    The location's caret is at the "+", line 523 column 15, but starts
1215    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
1216    of "bar" at column 19.  */
1217
1218 location_t
1219 make_location (location_t caret, location_t start, location_t finish)
1220 {
1221   return line_table->make_location (caret, start, finish);
1222 }
1223
1224 /* Same as above, but taking a source range rather than two locations.  */
1225
1226 location_t
1227 make_location (location_t caret, source_range src_range)
1228 {
1229   location_t pure_loc = get_pure_location (caret);
1230   return line_table->get_or_create_combined_loc (pure_loc, src_range,
1231                                                  nullptr, 0);
1232 }
1233
1234 /* An expanded_location stores the column in byte units.  This function
1235    converts that column to display units.  That requires reading the associated
1236    source line in order to calculate the display width.  If that cannot be done
1237    for any reason, then returns the byte column as a fallback.  */
1238 int
1239 location_compute_display_column (expanded_location exploc,
1240                                  const cpp_char_column_policy &policy)
1241 {
1242   if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1243     return exploc.column;
1244   char_span line = location_get_source_line (exploc.file, exploc.line);
1245   /* If line is NULL, this function returns exploc.column which is the
1246      desired fallback.  */
1247   return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
1248                                             exploc.column, policy);
1249 }
1250
1251 /* Dump statistics to stderr about the memory usage of the line_table
1252    set of line maps.  This also displays some statistics about macro
1253    expansion.  */
1254
1255 void
1256 dump_line_table_statistics (void)
1257 {
1258   struct linemap_stats s;
1259   long total_used_map_size,
1260     macro_maps_size,
1261     total_allocated_map_size;
1262
1263   memset (&s, 0, sizeof (s));
1264
1265   linemap_get_statistics (line_table, &s);
1266
1267   macro_maps_size = s.macro_maps_used_size
1268     + s.macro_maps_locations_size;
1269
1270   total_allocated_map_size = s.ordinary_maps_allocated_size
1271     + s.macro_maps_allocated_size
1272     + s.macro_maps_locations_size;
1273
1274   total_used_map_size = s.ordinary_maps_used_size
1275     + s.macro_maps_used_size
1276     + s.macro_maps_locations_size;
1277
1278   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
1279            s.num_expanded_macros);
1280   if (s.num_expanded_macros != 0)
1281     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
1282              s.num_macro_tokens / s.num_expanded_macros);
1283   fprintf (stderr,
1284            "\nLine Table allocations during the "
1285            "compilation process\n");
1286   fprintf (stderr, "Number of ordinary maps used:        " PRsa (5) "\n",
1287            SIZE_AMOUNT (s.num_ordinary_maps_used));
1288   fprintf (stderr, "Ordinary map used size:              " PRsa (5) "\n",
1289            SIZE_AMOUNT (s.ordinary_maps_used_size));
1290   fprintf (stderr, "Number of ordinary maps allocated:   " PRsa (5) "\n",
1291            SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1292   fprintf (stderr, "Ordinary maps allocated size:        " PRsa (5) "\n",
1293            SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1294   fprintf (stderr, "Number of macro maps used:           " PRsa (5) "\n",
1295            SIZE_AMOUNT (s.num_macro_maps_used));
1296   fprintf (stderr, "Macro maps used size:                " PRsa (5) "\n",
1297            SIZE_AMOUNT (s.macro_maps_used_size));
1298   fprintf (stderr, "Macro maps locations size:           " PRsa (5) "\n",
1299            SIZE_AMOUNT (s.macro_maps_locations_size));
1300   fprintf (stderr, "Macro maps size:                     " PRsa (5) "\n",
1301            SIZE_AMOUNT (macro_maps_size));
1302   fprintf (stderr, "Duplicated maps locations size:      " PRsa (5) "\n",
1303            SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1304   fprintf (stderr, "Total allocated maps size:           " PRsa (5) "\n",
1305            SIZE_AMOUNT (total_allocated_map_size));
1306   fprintf (stderr, "Total used maps size:                " PRsa (5) "\n",
1307            SIZE_AMOUNT (total_used_map_size));
1308   fprintf (stderr, "Ad-hoc table size:                   " PRsa (5) "\n",
1309            SIZE_AMOUNT (s.adhoc_table_size));
1310   fprintf (stderr, "Ad-hoc table entries used:           " PRsa (5) "\n",
1311            SIZE_AMOUNT (s.adhoc_table_entries_used));
1312   fprintf (stderr, "optimized_ranges:                    " PRsa (5) "\n",
1313            SIZE_AMOUNT (line_table->m_num_optimized_ranges));
1314   fprintf (stderr, "unoptimized_ranges:                  " PRsa (5) "\n",
1315            SIZE_AMOUNT (line_table->m_num_unoptimized_ranges));
1316
1317   fprintf (stderr, "\n");
1318 }
1319
1320 /* Get location one beyond the final location in ordinary map IDX.  */
1321
1322 static location_t
1323 get_end_location (class line_maps *set, unsigned int idx)
1324 {
1325   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1326     return set->highest_location;
1327
1328   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1329   return MAP_START_LOCATION (next_map);
1330 }
1331
1332 /* Helper function for write_digit_row.  */
1333
1334 static void
1335 write_digit (FILE *stream, int digit)
1336 {
1337   fputc ('0' + (digit % 10), stream);
1338 }
1339
1340 /* Helper function for dump_location_info.
1341    Write a row of numbers to STREAM, numbering a source line,
1342    giving the units, tens, hundreds etc of the column number.  */
1343
1344 static void
1345 write_digit_row (FILE *stream, int indent,
1346                  const line_map_ordinary *map,
1347                  location_t loc, int max_col, int divisor)
1348 {
1349   fprintf (stream, "%*c", indent, ' ');
1350   fprintf (stream, "|");
1351   for (int column = 1; column < max_col; column++)
1352     {
1353       location_t column_loc = loc + (column << map->m_range_bits);
1354       write_digit (stream, column_loc / divisor);
1355     }
1356   fprintf (stream, "\n");
1357 }
1358
1359 /* Write a half-closed (START) / half-open (END) interval of
1360    location_t to STREAM.  */
1361
1362 static void
1363 dump_location_range (FILE *stream,
1364                      location_t start, location_t end)
1365 {
1366   fprintf (stream,
1367            "  location_t interval: %u <= loc < %u\n",
1368            start, end);
1369 }
1370
1371 /* Write a labelled description of a half-closed (START) / half-open (END)
1372    interval of location_t to STREAM.  */
1373
1374 static void
1375 dump_labelled_location_range (FILE *stream,
1376                               const char *name,
1377                               location_t start, location_t end)
1378 {
1379   fprintf (stream, "%s\n", name);
1380   dump_location_range (stream, start, end);
1381   fprintf (stream, "\n");
1382 }
1383
1384 /* Write a visualization of the locations in the line_table to STREAM.  */
1385
1386 void
1387 dump_location_info (FILE *stream)
1388 {
1389   /* Visualize the reserved locations.  */
1390   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1391                                 0, RESERVED_LOCATION_COUNT);
1392
1393   /* Visualize the ordinary line_map instances, rendering the sources. */
1394   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1395     {
1396       location_t end_location = get_end_location (line_table, idx);
1397       /* half-closed: doesn't include this one. */
1398
1399       const line_map_ordinary *map
1400         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1401       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1402       dump_location_range (stream,
1403                            MAP_START_LOCATION (map), end_location);
1404       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1405       fprintf (stream, "  starting at line: %i\n",
1406                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1407       fprintf (stream, "  column and range bits: %i\n",
1408                map->m_column_and_range_bits);
1409       fprintf (stream, "  column bits: %i\n",
1410                map->m_column_and_range_bits - map->m_range_bits);
1411       fprintf (stream, "  range bits: %i\n",
1412                map->m_range_bits);
1413       const char * reason;
1414       switch (map->reason) {
1415       case LC_ENTER:
1416         reason = "LC_ENTER";
1417         break;
1418       case LC_LEAVE:
1419         reason = "LC_LEAVE";
1420         break;
1421       case LC_RENAME:
1422         reason = "LC_RENAME";
1423         break;
1424       case LC_RENAME_VERBATIM:
1425         reason = "LC_RENAME_VERBATIM";
1426         break;
1427       case LC_ENTER_MACRO:
1428         reason = "LC_RENAME_MACRO";
1429         break;
1430       default:
1431         reason = "Unknown";
1432       }
1433       fprintf (stream, "  reason: %d (%s)\n", map->reason, reason);
1434
1435       const line_map_ordinary *includer_map
1436         = linemap_included_from_linemap (line_table, map);
1437       fprintf (stream, "  included from location: %d",
1438                linemap_included_from (map));
1439       if (includer_map) {
1440         fprintf (stream, " (in ordinary map %d)",
1441                  int (includer_map - line_table->info_ordinary.maps));
1442       }
1443       fprintf (stream, "\n");
1444
1445       /* Render the span of source lines that this "map" covers.  */
1446       for (location_t loc = MAP_START_LOCATION (map);
1447            loc < end_location;
1448            loc += (1 << map->m_range_bits) )
1449         {
1450           gcc_assert (pure_location_p (line_table, loc) );
1451
1452           expanded_location exploc
1453             = linemap_expand_location (line_table, map, loc);
1454
1455           if (exploc.column == 0)
1456             {
1457               /* Beginning of a new source line: draw the line.  */
1458
1459               char_span line_text = location_get_source_line (exploc.file,
1460                                                               exploc.line);
1461               if (!line_text)
1462                 break;
1463               fprintf (stream,
1464                        "%s:%3i|loc:%5i|%.*s\n",
1465                        exploc.file, exploc.line,
1466                        loc,
1467                        (int)line_text.length (), line_text.get_buffer ());
1468
1469               /* "loc" is at column 0, which means "the whole line".
1470                  Render the locations *within* the line, by underlining
1471                  it, showing the location_t numeric values
1472                  at each column.  */
1473               size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1474               if (max_col > line_text.length ())
1475                 max_col = line_text.length () + 1;
1476
1477               int len_lnum = num_digits (exploc.line);
1478               if (len_lnum < 3)
1479                 len_lnum = 3;
1480               int len_loc = num_digits (loc);
1481               if (len_loc < 5)
1482                 len_loc = 5;
1483
1484               int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1485
1486               /* Thousands.  */
1487               if (end_location > 999)
1488                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1489
1490               /* Hundreds.  */
1491               if (end_location > 99)
1492                 write_digit_row (stream, indent, map, loc, max_col, 100);
1493
1494               /* Tens.  */
1495               write_digit_row (stream, indent, map, loc, max_col, 10);
1496
1497               /* Units.  */
1498               write_digit_row (stream, indent, map, loc, max_col, 1);
1499             }
1500         }
1501       fprintf (stream, "\n");
1502     }
1503
1504   /* Visualize unallocated values.  */
1505   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1506                                 line_table->highest_location,
1507                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1508
1509   /* Visualize the macro line_map instances, rendering the sources. */
1510   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1511     {
1512       /* Each macro map that is allocated owns location_t values
1513          that are *lower* that the one before them.
1514          Hence it's meaningful to view them either in order of ascending
1515          source locations, or in order of ascending macro map index.  */
1516       const bool ascending_location_ts = true;
1517       unsigned int idx = (ascending_location_ts
1518                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1519                           : i);
1520       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1521       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1522                idx,
1523                linemap_map_get_macro_name (map),
1524                MACRO_MAP_NUM_MACRO_TOKENS (map));
1525       dump_location_range (stream,
1526                            map->start_location,
1527                            (map->start_location
1528                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1529       inform (map->get_expansion_point_location (),
1530               "expansion point is location %i",
1531               map->get_expansion_point_location ());
1532       fprintf (stream, "  map->start_location: %u\n",
1533                map->start_location);
1534
1535       fprintf (stream, "  macro_locations:\n");
1536       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1537         {
1538           location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1539           location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1540
1541           /* linemap_add_macro_token encodes token numbers in an expansion
1542              by putting them after MAP_START_LOCATION. */
1543
1544           /* I'm typically seeing 4 uninitialized entries at the end of
1545              0xafafafaf.
1546              This appears to be due to macro.cc:replace_args
1547              adding 2 extra args for padding tokens; presumably there may
1548              be a leading and/or trailing padding token injected,
1549              each for 2 more location slots.
1550              This would explain there being up to 4 location_ts slots
1551              that may be uninitialized.  */
1552
1553           fprintf (stream, "    %u: %u, %u\n",
1554                    i,
1555                    x,
1556                    y);
1557           if (x == y)
1558             {
1559               if (x < MAP_START_LOCATION (map))
1560                 inform (x, "token %u has %<x-location == y-location == %u%>",
1561                         i, x);
1562               else
1563                 fprintf (stream,
1564                          "x-location == y-location == %u encodes token # %u\n",
1565                          x, x - MAP_START_LOCATION (map));
1566                 }
1567           else
1568             {
1569               inform (x, "token %u has %<x-location == %u%>", i, x);
1570               inform (x, "token %u has %<y-location == %u%>", i, y);
1571             }
1572         }
1573       fprintf (stream, "\n");
1574     }
1575
1576   /* It appears that MAX_LOCATION_T itself is never assigned to a
1577      macro map, presumably due to an off-by-one error somewhere
1578      between the logic in linemap_enter_macro and
1579      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1580   dump_labelled_location_range (stream, "MAX_LOCATION_T",
1581                                 MAX_LOCATION_T,
1582                                 MAX_LOCATION_T + 1);
1583
1584   /* Visualize ad-hoc values.  */
1585   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1586                                 MAX_LOCATION_T + 1, UINT_MAX);
1587 }
1588
1589 /* string_concat's constructor.  */
1590
1591 string_concat::string_concat (int num, location_t *locs)
1592   : m_num (num)
1593 {
1594   m_locs = ggc_vec_alloc <location_t> (num);
1595   for (int i = 0; i < num; i++)
1596     m_locs[i] = locs[i];
1597 }
1598
1599 /* string_concat_db's constructor.  */
1600
1601 string_concat_db::string_concat_db ()
1602 {
1603   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1604 }
1605
1606 /* Record that a string concatenation occurred, covering NUM
1607    string literal tokens.  LOCS is an array of size NUM, containing the
1608    locations of the tokens.  A copy of LOCS is taken.  */
1609
1610 void
1611 string_concat_db::record_string_concatenation (int num, location_t *locs)
1612 {
1613   gcc_assert (num > 1);
1614   gcc_assert (locs);
1615
1616   location_t key_loc = get_key_loc (locs[0]);
1617   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1618      any data now recorded under key 'key_loc' would be overwritten by a
1619      subsequent call with the same key 'key_loc'.  */
1620   if (RESERVED_LOCATION_P (key_loc))
1621     return;
1622
1623   string_concat *concat
1624     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1625   m_table->put (key_loc, concat);
1626 }
1627
1628 /* Determine if LOC was the location of the initial token of a
1629    concatenation of string literal tokens.
1630    If so, *OUT_NUM is written to with the number of tokens, and
1631    *OUT_LOCS with the location of an array of locations of the
1632    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1633    storage owned by the string_concat_db.
1634    Otherwise, return false.  */
1635
1636 bool
1637 string_concat_db::get_string_concatenation (location_t loc,
1638                                             int *out_num,
1639                                             location_t **out_locs)
1640 {
1641   gcc_assert (out_num);
1642   gcc_assert (out_locs);
1643
1644   location_t key_loc = get_key_loc (loc);
1645   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1646      discussion in 'string_concat_db::record_string_concatenation'.  */
1647   if (RESERVED_LOCATION_P (key_loc))
1648     return false;
1649
1650   string_concat **concat = m_table->get (key_loc);
1651   if (!concat)
1652     return false;
1653
1654   *out_num = (*concat)->m_num;
1655   *out_locs =(*concat)->m_locs;
1656   return true;
1657 }
1658
1659 /* Internal function.  Canonicalize LOC into a form suitable for
1660    use as a key within the database, stripping away macro expansion,
1661    ad-hoc information, and range information, using the location of
1662    the start of LOC within an ordinary linemap.  */
1663
1664 location_t
1665 string_concat_db::get_key_loc (location_t loc)
1666 {
1667   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1668                                   NULL);
1669
1670   loc = get_range_from_loc (line_table, loc).m_start;
1671
1672   return loc;
1673 }
1674
1675 /* Helper class for use within get_substring_ranges_for_loc.
1676    An vec of cpp_string with responsibility for releasing all of the
1677    str->text for each str in the vector.  */
1678
1679 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1680 {
1681  public:
1682   auto_cpp_string_vec (int alloc)
1683     : auto_vec <cpp_string> (alloc) {}
1684
1685   ~auto_cpp_string_vec ()
1686   {
1687     /* Clean up the copies within this vec.  */
1688     int i;
1689     cpp_string *str;
1690     FOR_EACH_VEC_ELT (*this, i, str)
1691       free (const_cast <unsigned char *> (str->text));
1692   }
1693 };
1694
1695 /* Attempt to populate RANGES with source location information on the
1696    individual characters within the string literal found at STRLOC.
1697    If CONCATS is non-NULL, then any string literals that the token at
1698    STRLOC  was concatenated with are also added to RANGES.
1699
1700    Return NULL if successful, or an error message if any errors occurred (in
1701    which case RANGES may be only partially populated and should not
1702    be used).
1703
1704    This is implemented by re-parsing the relevant source line(s).  */
1705
1706 static const char *
1707 get_substring_ranges_for_loc (cpp_reader *pfile,
1708                               string_concat_db *concats,
1709                               location_t strloc,
1710                               enum cpp_ttype type,
1711                               cpp_substring_ranges &ranges)
1712 {
1713   gcc_assert (pfile);
1714
1715   if (strloc == UNKNOWN_LOCATION)
1716     return "unknown location";
1717
1718   /* Reparsing the strings requires accurate location information.
1719      If -ftrack-macro-expansion has been overridden from its default
1720      of 2, then we might have a location of a macro expansion point,
1721      rather than the location of the literal itself.
1722      Avoid this by requiring that we have full macro expansion tracking
1723      for substring locations to be available.  */
1724   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1725     return "track_macro_expansion != 2";
1726
1727   /* If #line or # 44 "file"-style directives are present, then there's
1728      no guarantee that the line numbers we have can be used to locate
1729      the strings.  For example, we might have a .i file with # directives
1730      pointing back to lines within a .c file, but the .c file might
1731      have been edited since the .i file was created.
1732      In such a case, the safest course is to disable on-demand substring
1733      locations.  */
1734   if (line_table->seen_line_directive)
1735     return "seen line directive";
1736
1737   /* If string concatenation has occurred at STRLOC, get the locations
1738      of all of the literal tokens making up the compound string.
1739      Otherwise, just use STRLOC.  */
1740   int num_locs = 1;
1741   location_t *strlocs = &strloc;
1742   if (concats)
1743     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1744
1745   auto_cpp_string_vec strs (num_locs);
1746   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1747   for (int i = 0; i < num_locs; i++)
1748     {
1749       /* Get range of strloc.  We will use it to locate the start and finish
1750          of the literal token within the line.  */
1751       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1752
1753       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1754         {
1755           /* If the string token was within a macro expansion, then we can
1756              cope with it for the simple case where we have a single token.
1757              Otherwise, bail out.  */
1758           if (src_range.m_start != src_range.m_finish)
1759             return "macro expansion";
1760         }
1761       else
1762         {
1763           if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1764             /* If so, we can't reliably determine where the token started within
1765                its line.  */
1766             return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1767
1768           if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1769             /* If so, we can't reliably determine where the token finished
1770                within its line.  */
1771             return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1772         }
1773
1774       expanded_location start
1775         = expand_location_to_spelling_point (src_range.m_start,
1776                                              LOCATION_ASPECT_START);
1777       expanded_location finish
1778         = expand_location_to_spelling_point (src_range.m_finish,
1779                                              LOCATION_ASPECT_FINISH);
1780       if (start.file != finish.file)
1781         return "range endpoints are in different files";
1782       if (start.line != finish.line)
1783         return "range endpoints are on different lines";
1784       if (start.column > finish.column)
1785         return "range endpoints are reversed";
1786
1787       char_span line = location_get_source_line (start.file, start.line);
1788       if (!line)
1789         return "unable to read source line";
1790
1791       /* Determine the location of the literal (including quotes
1792          and leading prefix chars, such as the 'u' in a u""
1793          token).  */
1794       size_t literal_length = finish.column - start.column + 1;
1795
1796       /* Ensure that we don't crash if we got the wrong location.  */
1797       if (start.column < 1)
1798         return "zero start column";
1799       if (line.length () < (start.column - 1 + literal_length))
1800         return "line is not wide enough";
1801
1802       char_span literal = line.subspan (start.column - 1, literal_length);
1803
1804       cpp_string from;
1805       from.len = literal_length;
1806       /* Make a copy of the literal, to avoid having to rely on
1807          the lifetime of the copy of the line within the cache.
1808          This will be released by the auto_cpp_string_vec dtor.  */
1809       from.text = (unsigned char *)literal.xstrdup ();
1810       strs.safe_push (from);
1811
1812       /* For very long lines, a new linemap could have started
1813          halfway through the token.
1814          Ensure that the loc_reader uses the linemap of the
1815          *end* of the token for its start location.  */
1816       const line_map_ordinary *start_ord_map;
1817       linemap_resolve_location (line_table, src_range.m_start,
1818                                 LRK_SPELLING_LOCATION, &start_ord_map);
1819       const line_map_ordinary *final_ord_map;
1820       linemap_resolve_location (line_table, src_range.m_finish,
1821                                 LRK_SPELLING_LOCATION, &final_ord_map);
1822       if (start_ord_map == NULL || final_ord_map == NULL)
1823         return "failed to get ordinary maps";
1824       /* Bulletproofing.  We ought to only have different ordinary maps
1825          for start vs finish due to line-length jumps.  */
1826       if (start_ord_map != final_ord_map
1827           && start_ord_map->to_file != final_ord_map->to_file)
1828         return "start and finish are spelled in different ordinary maps";
1829       /* The file from linemap_resolve_location ought to match that from
1830          expand_location_to_spelling_point.  */
1831       if (start_ord_map->to_file != start.file)
1832         return "mismatching file after resolving linemap";
1833
1834       location_t start_loc
1835         = linemap_position_for_line_and_column (line_table, final_ord_map,
1836                                                 start.line, start.column);
1837
1838       cpp_string_location_reader loc_reader (start_loc, line_table);
1839       loc_readers.safe_push (loc_reader);
1840     }
1841
1842   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1843   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1844                                                  loc_readers.address (),
1845                                                  num_locs, &ranges, type);
1846   if (err)
1847     return err;
1848
1849   /* Success: "ranges" should now contain information on the string.  */
1850   return NULL;
1851 }
1852
1853 /* Attempt to populate *OUT_LOC with source location information on the
1854    given characters within the string literal found at STRLOC.
1855    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1856    character set.
1857
1858    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1859    and string literal "012345\n789"
1860    *OUT_LOC is written to with:
1861      "012345\n789"
1862          ~^~~~~
1863
1864    If CONCATS is non-NULL, then any string literals that the token at
1865    STRLOC was concatenated with are also considered.
1866
1867    This is implemented by re-parsing the relevant source line(s).
1868
1869    Return NULL if successful, or an error message if any errors occurred.
1870    Error messages are intended for GCC developers (to help debugging) rather
1871    than for end-users.  */
1872
1873 const char *
1874 get_location_within_string (cpp_reader *pfile,
1875                             string_concat_db *concats,
1876                             location_t strloc,
1877                             enum cpp_ttype type,
1878                             int caret_idx, int start_idx, int end_idx,
1879                             location_t *out_loc)
1880 {
1881   gcc_checking_assert (caret_idx >= 0);
1882   gcc_checking_assert (start_idx >= 0);
1883   gcc_checking_assert (end_idx >= 0);
1884   gcc_assert (out_loc);
1885
1886   cpp_substring_ranges ranges;
1887   const char *err
1888     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1889   if (err)
1890     return err;
1891
1892   if (caret_idx >= ranges.get_num_ranges ())
1893     return "caret_idx out of range";
1894   if (start_idx >= ranges.get_num_ranges ())
1895     return "start_idx out of range";
1896   if (end_idx >= ranges.get_num_ranges ())
1897     return "end_idx out of range";
1898
1899   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1900                             ranges.get_range (start_idx).m_start,
1901                             ranges.get_range (end_idx).m_finish);
1902   return NULL;
1903 }
1904
1905 /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */
1906
1907 location_t
1908 location_with_discriminator (location_t locus, int discriminator)
1909 {
1910   tree block = LOCATION_BLOCK (locus);
1911   source_range src_range = get_range_from_loc (line_table, locus);
1912   locus = get_pure_location (locus);
1913
1914   if (locus == UNKNOWN_LOCATION)
1915     return locus;
1916
1917   return line_table->get_or_create_combined_loc (locus, src_range, block,
1918                                                  discriminator);
1919 }
1920
1921 /* Return TRUE if LOCUS represents a location with a discriminator.  */
1922
1923 bool
1924 has_discriminator (location_t locus)
1925 {
1926   return get_discriminator_from_loc (locus) != 0;
1927 }
1928
1929 /* Return the discriminator for LOCUS.  */
1930
1931 int
1932 get_discriminator_from_loc (location_t locus)
1933 {
1934   return get_discriminator_from_loc (line_table, locus);
1935 }
1936
1937 #if CHECKING_P
1938
1939 namespace selftest {
1940
1941 /* Selftests of location handling.  */
1942
1943 /* Attempt to populate *OUT_RANGE with source location information on the
1944    given character within the string literal found at STRLOC.
1945    CHAR_IDX refers to an offset within the execution character set.
1946    If CONCATS is non-NULL, then any string literals that the token at
1947    STRLOC was concatenated with are also considered.
1948
1949    This is implemented by re-parsing the relevant source line(s).
1950
1951    Return NULL if successful, or an error message if any errors occurred.
1952    Error messages are intended for GCC developers (to help debugging) rather
1953    than for end-users.  */
1954
1955 static const char *
1956 get_source_range_for_char (cpp_reader *pfile,
1957                            string_concat_db *concats,
1958                            location_t strloc,
1959                            enum cpp_ttype type,
1960                            int char_idx,
1961                            source_range *out_range)
1962 {
1963   gcc_checking_assert (char_idx >= 0);
1964   gcc_assert (out_range);
1965
1966   cpp_substring_ranges ranges;
1967   const char *err
1968     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1969   if (err)
1970     return err;
1971
1972   if (char_idx >= ranges.get_num_ranges ())
1973     return "char_idx out of range";
1974
1975   *out_range = ranges.get_range (char_idx);
1976   return NULL;
1977 }
1978
1979 /* As get_source_range_for_char, but write to *OUT the number
1980    of ranges that are available.  */
1981
1982 static const char *
1983 get_num_source_ranges_for_substring (cpp_reader *pfile,
1984                                      string_concat_db *concats,
1985                                      location_t strloc,
1986                                      enum cpp_ttype type,
1987                                      int *out)
1988 {
1989   gcc_assert (out);
1990
1991   cpp_substring_ranges ranges;
1992   const char *err
1993     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1994
1995   if (err)
1996     return err;
1997
1998   *out = ranges.get_num_ranges ();
1999   return NULL;
2000 }
2001
2002 /* Selftests of location handling.  */
2003
2004 /* Verify that compare() on linenum_type handles comparisons over the full
2005    range of the type.  */
2006
2007 static void
2008 test_linenum_comparisons ()
2009 {
2010   linenum_type min_line (0);
2011   linenum_type max_line (0xffffffff);
2012   ASSERT_EQ (0, compare (min_line, min_line));
2013   ASSERT_EQ (0, compare (max_line, max_line));
2014
2015   ASSERT_GT (compare (max_line, min_line), 0);
2016   ASSERT_LT (compare (min_line, max_line), 0);
2017 }
2018
2019 /* Helper function for verifying location data: when location_t
2020    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
2021    as having column 0.  */
2022
2023 static bool
2024 should_have_column_data_p (location_t loc)
2025 {
2026   if (IS_ADHOC_LOC (loc))
2027     loc = get_location_from_adhoc_loc (line_table, loc);
2028   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
2029     return false;
2030   return true;
2031 }
2032
2033 /* Selftest for should_have_column_data_p.  */
2034
2035 static void
2036 test_should_have_column_data_p ()
2037 {
2038   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
2039   ASSERT_TRUE
2040     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
2041   ASSERT_FALSE
2042     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
2043 }
2044
2045 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
2046    on LOC.  */
2047
2048 static void
2049 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
2050               location_t loc)
2051 {
2052   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
2053   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
2054   /* If location_t values are sufficiently high, then column numbers
2055      will be unavailable and LOCATION_COLUMN (loc) will be 0.
2056      When close to the threshold, column numbers *may* be present: if
2057      the final linemap before the threshold contains a line that straddles
2058      the threshold, locations in that line have column information.  */
2059   if (should_have_column_data_p (loc))
2060     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
2061 }
2062
2063 /* Various selftests involve constructing a line table and one or more
2064    line maps within it.
2065
2066    For maximum test coverage we want to run these tests with a variety
2067    of situations:
2068    - line_table->default_range_bits: some frontends use a non-zero value
2069    and others use zero
2070    - the fallback modes within line-map.cc: there are various threshold
2071    values for location_t beyond line-map.cc changes
2072    behavior (disabling of the range-packing optimization, disabling
2073    of column-tracking).  We can exercise these by starting the line_table
2074    at interesting values at or near these thresholds.
2075
2076    The following struct describes a particular case within our test
2077    matrix.  */
2078
2079 class line_table_case
2080 {
2081 public:
2082   line_table_case (int default_range_bits, int base_location)
2083   : m_default_range_bits (default_range_bits),
2084     m_base_location (base_location)
2085   {}
2086
2087   int m_default_range_bits;
2088   int m_base_location;
2089 };
2090
2091 /* Constructor.  Store the old value of line_table, and create a new
2092    one, using sane defaults.  */
2093
2094 line_table_test::line_table_test ()
2095 {
2096   gcc_assert (saved_line_table == NULL);
2097   saved_line_table = line_table;
2098   line_table = ggc_alloc<line_maps> ();
2099   linemap_init (line_table, BUILTINS_LOCATION);
2100   gcc_assert (saved_line_table->m_reallocator);
2101   line_table->m_reallocator = saved_line_table->m_reallocator;
2102   gcc_assert (saved_line_table->m_round_alloc_size);
2103   line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2104   line_table->default_range_bits = 0;
2105 }
2106
2107 /* Constructor.  Store the old value of line_table, and create a new
2108    one, using the sitation described in CASE_.  */
2109
2110 line_table_test::line_table_test (const line_table_case &case_)
2111 {
2112   gcc_assert (saved_line_table == NULL);
2113   saved_line_table = line_table;
2114   line_table = ggc_alloc<line_maps> ();
2115   linemap_init (line_table, BUILTINS_LOCATION);
2116   gcc_assert (saved_line_table->m_reallocator);
2117   line_table->m_reallocator = saved_line_table->m_reallocator;
2118   gcc_assert (saved_line_table->m_round_alloc_size);
2119   line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2120   line_table->default_range_bits = case_.m_default_range_bits;
2121   if (case_.m_base_location)
2122     {
2123       line_table->highest_location = case_.m_base_location;
2124       line_table->highest_line = case_.m_base_location;
2125     }
2126 }
2127
2128 /* Destructor.  Restore the old value of line_table.  */
2129
2130 line_table_test::~line_table_test ()
2131 {
2132   gcc_assert (saved_line_table != NULL);
2133   line_table = saved_line_table;
2134   saved_line_table = NULL;
2135 }
2136
2137 /* Verify basic operation of ordinary linemaps.  */
2138
2139 static void
2140 test_accessing_ordinary_linemaps (const line_table_case &case_)
2141 {
2142   line_table_test ltt (case_);
2143
2144   /* Build a simple linemap describing some locations. */
2145   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
2146
2147   linemap_line_start (line_table, 1, 100);
2148   location_t loc_a = linemap_position_for_column (line_table, 1);
2149   location_t loc_b = linemap_position_for_column (line_table, 23);
2150
2151   linemap_line_start (line_table, 2, 100);
2152   location_t loc_c = linemap_position_for_column (line_table, 1);
2153   location_t loc_d = linemap_position_for_column (line_table, 17);
2154
2155   /* Example of a very long line.  */
2156   linemap_line_start (line_table, 3, 2000);
2157   location_t loc_e = linemap_position_for_column (line_table, 700);
2158
2159   /* Transitioning back to a short line.  */
2160   linemap_line_start (line_table, 4, 0);
2161   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
2162
2163   if (should_have_column_data_p (loc_back_to_short))
2164     {
2165       /* Verify that we switched to short lines in the linemap.  */
2166       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
2167       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
2168     }
2169
2170   /* Example of a line that will eventually be seen to be longer
2171      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
2172      below that.  */
2173   linemap_line_start (line_table, 5, 2000);
2174
2175   location_t loc_start_of_very_long_line
2176     = linemap_position_for_column (line_table, 2000);
2177   location_t loc_too_wide
2178     = linemap_position_for_column (line_table, 4097);
2179   location_t loc_too_wide_2
2180     = linemap_position_for_column (line_table, 4098);
2181
2182   /* ...and back to a sane line length.  */
2183   linemap_line_start (line_table, 6, 100);
2184   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
2185
2186   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2187
2188   /* Multiple files.  */
2189   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
2190   linemap_line_start (line_table, 1, 200);
2191   location_t loc_f = linemap_position_for_column (line_table, 150);
2192   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2193
2194   /* Verify that we can recover the location info.  */
2195   assert_loceq ("foo.c", 1, 1, loc_a);
2196   assert_loceq ("foo.c", 1, 23, loc_b);
2197   assert_loceq ("foo.c", 2, 1, loc_c);
2198   assert_loceq ("foo.c", 2, 17, loc_d);
2199   assert_loceq ("foo.c", 3, 700, loc_e);
2200   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
2201
2202   /* In the very wide line, the initial location should be fully tracked.  */
2203   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
2204   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
2205      be disabled.  */
2206   assert_loceq ("foo.c", 5, 0, loc_too_wide);
2207   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
2208   /*...and column-tracking should be re-enabled for subsequent lines.  */
2209   assert_loceq ("foo.c", 6, 10, loc_sane_again);
2210
2211   assert_loceq ("bar.c", 1, 150, loc_f);
2212
2213   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2214   ASSERT_TRUE (pure_location_p (line_table, loc_a));
2215
2216   /* Verify using make_location to build a range, and extracting data
2217      back from it.  */
2218   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2219   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2220   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2221   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2222   ASSERT_EQ (loc_b, src_range.m_start);
2223   ASSERT_EQ (loc_d, src_range.m_finish);
2224 }
2225
2226 /* Verify various properties of UNKNOWN_LOCATION.  */
2227
2228 static void
2229 test_unknown_location ()
2230 {
2231   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2232   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2233   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2234 }
2235
2236 /* Verify various properties of BUILTINS_LOCATION.  */
2237
2238 static void
2239 test_builtins ()
2240 {
2241   assert_loceq (special_fname_builtin (), 0, 0, BUILTINS_LOCATION);
2242   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2243 }
2244
2245 /* Regression test for make_location.
2246    Ensure that we use pure locations for the start/finish of the range,
2247    rather than storing a packed or ad-hoc range as the start/finish.  */
2248
2249 static void
2250 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2251 {
2252   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2253      with C++ frontend.
2254      ....................0000000001111111111222.
2255      ....................1234567890123456789012.  */
2256   const char *content = "     r += !aaa == bbb;\n";
2257   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2258   line_table_test ltt (case_);
2259   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2260
2261   const location_t c11 = linemap_position_for_column (line_table, 11);
2262   const location_t c12 = linemap_position_for_column (line_table, 12);
2263   const location_t c13 = linemap_position_for_column (line_table, 13);
2264   const location_t c14 = linemap_position_for_column (line_table, 14);
2265   const location_t c21 = linemap_position_for_column (line_table, 21);
2266
2267   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2268     return;
2269
2270   /* Use column 13 for the caret location, arbitrarily, to verify that we
2271      handle start != caret.  */
2272   const location_t aaa = make_location (c13, c12, c14);
2273   ASSERT_EQ (c13, get_pure_location (aaa));
2274   ASSERT_EQ (c12, get_start (aaa));
2275   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2276   ASSERT_EQ (c14, get_finish (aaa));
2277   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2278
2279   /* Make a location using a location with a range as the start-point.  */
2280   const location_t not_aaa = make_location (c11, aaa, c14);
2281   ASSERT_EQ (c11, get_pure_location (not_aaa));
2282   /* It should use the start location of the range, not store the range
2283      itself.  */
2284   ASSERT_EQ (c12, get_start (not_aaa));
2285   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2286   ASSERT_EQ (c14, get_finish (not_aaa));
2287   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2288
2289   /* Similarly, make a location with a range as the end-point.  */
2290   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2291   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2292   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2293   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2294   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2295   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2296   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2297   /* It should use the finish location of the range, not store the range
2298      itself.  */
2299   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2300   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2301   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2302   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2303   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2304 }
2305
2306 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
2307
2308 static void
2309 test_reading_source_line ()
2310 {
2311   /* Create a tempfile and write some text to it.  */
2312   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2313                         "01234567890123456789\n"
2314                         "This is the test text\n"
2315                         "This is the 3rd line");
2316
2317   /* Read back a specific line from the tempfile.  */
2318   char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2319   ASSERT_TRUE (source_line);
2320   ASSERT_TRUE (source_line.get_buffer () != NULL);
2321   ASSERT_EQ (20, source_line.length ());
2322   ASSERT_TRUE (!strncmp ("This is the 3rd line",
2323                          source_line.get_buffer (), source_line.length ()));
2324
2325   source_line = location_get_source_line (tmp.get_filename (), 2);
2326   ASSERT_TRUE (source_line);
2327   ASSERT_TRUE (source_line.get_buffer () != NULL);
2328   ASSERT_EQ (21, source_line.length ());
2329   ASSERT_TRUE (!strncmp ("This is the test text",
2330                          source_line.get_buffer (), source_line.length ()));
2331
2332   source_line = location_get_source_line (tmp.get_filename (), 4);
2333   ASSERT_FALSE (source_line);
2334   ASSERT_TRUE (source_line.get_buffer () == NULL);
2335 }
2336
2337 /* Tests of lexing.  */
2338
2339 /* Verify that token TOK from PARSER has cpp_token_as_text
2340    equal to EXPECTED_TEXT.  */
2341
2342 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
2343   SELFTEST_BEGIN_STMT                                                   \
2344     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
2345     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
2346   SELFTEST_END_STMT
2347
2348 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2349    and ranges from EXP_START_COL to EXP_FINISH_COL.
2350    Use LOC as the effective location of the selftest.  */
2351
2352 static void
2353 assert_token_loc_eq (const location &loc,
2354                      const cpp_token *tok,
2355                      const char *exp_filename, int exp_linenum,
2356                      int exp_start_col, int exp_finish_col)
2357 {
2358   location_t tok_loc = tok->src_loc;
2359   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2360   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2361
2362   /* If location_t values are sufficiently high, then column numbers
2363      will be unavailable.  */
2364   if (!should_have_column_data_p (tok_loc))
2365     return;
2366
2367   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2368   source_range tok_range = get_range_from_loc (line_table, tok_loc);
2369   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2370   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2371 }
2372
2373 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2374    SELFTEST_LOCATION as the effective location of the selftest.  */
2375
2376 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2377                             EXP_START_COL, EXP_FINISH_COL) \
2378   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2379                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2380
2381 /* Test of lexing a file using libcpp, verifying tokens and their
2382    location information.  */
2383
2384 static void
2385 test_lexer (const line_table_case &case_)
2386 {
2387   /* Create a tempfile and write some text to it.  */
2388   const char *content =
2389     /*00000000011111111112222222222333333.3333444444444.455555555556
2390       12345678901234567890123456789012345.6789012345678.901234567890.  */
2391     ("test_name /* c-style comment */\n"
2392      "                                  \"test literal\"\n"
2393      " // test c++-style comment\n"
2394      "   42\n");
2395   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2396
2397   line_table_test ltt (case_);
2398
2399   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2400
2401   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2402   ASSERT_NE (fname, NULL);
2403
2404   /* Verify that we get the expected tokens back, with the correct
2405      location information.  */
2406
2407   location_t loc;
2408   const cpp_token *tok;
2409   tok = cpp_get_token_with_location (parser, &loc);
2410   ASSERT_NE (tok, NULL);
2411   ASSERT_EQ (tok->type, CPP_NAME);
2412   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2413   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2414
2415   tok = cpp_get_token_with_location (parser, &loc);
2416   ASSERT_NE (tok, NULL);
2417   ASSERT_EQ (tok->type, CPP_STRING);
2418   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2419   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2420
2421   tok = cpp_get_token_with_location (parser, &loc);
2422   ASSERT_NE (tok, NULL);
2423   ASSERT_EQ (tok->type, CPP_NUMBER);
2424   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2425   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2426
2427   tok = cpp_get_token_with_location (parser, &loc);
2428   ASSERT_NE (tok, NULL);
2429   ASSERT_EQ (tok->type, CPP_EOF);
2430
2431   cpp_finish (parser, NULL);
2432   cpp_destroy (parser);
2433 }
2434
2435 /* Forward decls.  */
2436
2437 class lexer_test;
2438 class lexer_test_options;
2439
2440 /* A class for specifying options of a lexer_test.
2441    The "apply" vfunc is called during the lexer_test constructor.  */
2442
2443 class lexer_test_options
2444 {
2445  public:
2446   virtual void apply (lexer_test &) = 0;
2447 };
2448
2449 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2450    in its dtor.
2451
2452    This is needed by struct lexer_test to ensure that the cleanup of the
2453    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2454
2455 class cpp_reader_ptr
2456 {
2457  public:
2458   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2459
2460   ~cpp_reader_ptr ()
2461   {
2462     cpp_finish (m_ptr, NULL);
2463     cpp_destroy (m_ptr);
2464   }
2465
2466   operator cpp_reader * () const { return m_ptr; }
2467
2468  private:
2469   cpp_reader *m_ptr;
2470 };
2471
2472 /* A struct for writing lexer tests.  */
2473
2474 class lexer_test
2475 {
2476 public:
2477   lexer_test (const line_table_case &case_, const char *content,
2478               lexer_test_options *options);
2479   ~lexer_test ();
2480
2481   const cpp_token *get_token ();
2482
2483   /* The ordering of these fields matters.
2484      The line_table_test must be first, since the cpp_reader_ptr
2485      uses it.
2486      The cpp_reader must be cleaned up *after* the temp_source_file
2487      since the filenames in input.cc's input cache are owned by the
2488      cpp_reader; in particular, when ~temp_source_file evicts the
2489      filename the filenames must still be alive.  */
2490   line_table_test m_ltt;
2491   cpp_reader_ptr m_parser;
2492   temp_source_file m_tempfile;
2493   string_concat_db m_concats;
2494   bool m_implicitly_expect_EOF;
2495 };
2496
2497 /* Use an EBCDIC encoding for the execution charset, specifically
2498    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2499
2500    This exercises iconv integration within libcpp.
2501    Not every build of iconv supports the given charset,
2502    so we need to flag this error and handle it gracefully.  */
2503
2504 class ebcdic_execution_charset : public lexer_test_options
2505 {
2506  public:
2507   ebcdic_execution_charset () : m_num_iconv_errors (0)
2508     {
2509       gcc_assert (s_singleton == NULL);
2510       s_singleton = this;
2511     }
2512   ~ebcdic_execution_charset ()
2513     {
2514       gcc_assert (s_singleton == this);
2515       s_singleton = NULL;
2516     }
2517
2518   void apply (lexer_test &test) final override
2519   {
2520     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2521     cpp_opts->narrow_charset = "IBM1047";
2522
2523     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2524     callbacks->diagnostic = on_diagnostic;
2525   }
2526
2527   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2528                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2529                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2530                              rich_location *richloc ATTRIBUTE_UNUSED,
2531                              const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2532     ATTRIBUTE_FPTR_PRINTF(5,0)
2533   {
2534     gcc_assert (s_singleton);
2535     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2536     const char *msg = "conversion from %s to %s not supported by iconv";
2537 #ifdef ENABLE_NLS
2538     msg = dgettext ("cpplib", msg);
2539 #endif
2540     /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
2541        when the local iconv build doesn't support the conversion.  */
2542     if (strcmp (msgid, msg) == 0)
2543       {
2544         s_singleton->m_num_iconv_errors++;
2545         return true;
2546       }
2547
2548     /* Otherwise, we have an unexpected error.  */
2549     abort ();
2550   }
2551
2552   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2553
2554  private:
2555   static ebcdic_execution_charset *s_singleton;
2556   int m_num_iconv_errors;
2557 };
2558
2559 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2560
2561 /* A lexer_test_options subclass that records a list of diagnostic
2562    messages emitted by the lexer.  */
2563
2564 class lexer_diagnostic_sink : public lexer_test_options
2565 {
2566  public:
2567   lexer_diagnostic_sink ()
2568   {
2569     gcc_assert (s_singleton == NULL);
2570     s_singleton = this;
2571   }
2572   ~lexer_diagnostic_sink ()
2573   {
2574     gcc_assert (s_singleton == this);
2575     s_singleton = NULL;
2576
2577     int i;
2578     char *str;
2579     FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2580       free (str);
2581   }
2582
2583   void apply (lexer_test &test) final override
2584   {
2585     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2586     callbacks->diagnostic = on_diagnostic;
2587   }
2588
2589   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2590                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2591                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2592                              rich_location *richloc ATTRIBUTE_UNUSED,
2593                              const char *msgid, va_list *ap)
2594     ATTRIBUTE_FPTR_PRINTF(5,0)
2595   {
2596     char *msg = xvasprintf (msgid, *ap);
2597     s_singleton->m_diagnostics.safe_push (msg);
2598     return true;
2599   }
2600
2601   auto_vec<char *> m_diagnostics;
2602
2603  private:
2604   static lexer_diagnostic_sink *s_singleton;
2605 };
2606
2607 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2608
2609 /* Constructor.  Override line_table with a new instance based on CASE_,
2610    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2611    start parsing the tempfile.  */
2612
2613 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2614                         lexer_test_options *options)
2615 : m_ltt (case_),
2616   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2617   /* Create a tempfile and write the text to it.  */
2618   m_tempfile (SELFTEST_LOCATION, ".c", content),
2619   m_concats (),
2620   m_implicitly_expect_EOF (true)
2621 {
2622   if (options)
2623     options->apply (*this);
2624
2625   cpp_init_iconv (m_parser);
2626
2627   /* Parse the file.  */
2628   const char *fname = cpp_read_main_file (m_parser,
2629                                           m_tempfile.get_filename ());
2630   ASSERT_NE (fname, NULL);
2631 }
2632
2633 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2634
2635 lexer_test::~lexer_test ()
2636 {
2637   location_t loc;
2638   const cpp_token *tok;
2639
2640   if (m_implicitly_expect_EOF)
2641     {
2642       tok = cpp_get_token_with_location (m_parser, &loc);
2643       ASSERT_NE (tok, NULL);
2644       ASSERT_EQ (tok->type, CPP_EOF);
2645     }
2646 }
2647
2648 /* Get the next token from m_parser.  */
2649
2650 const cpp_token *
2651 lexer_test::get_token ()
2652 {
2653   location_t loc;
2654   const cpp_token *tok;
2655
2656   tok = cpp_get_token_with_location (m_parser, &loc);
2657   ASSERT_NE (tok, NULL);
2658   return tok;
2659 }
2660
2661 /* Verify that locations within string literals are correctly handled.  */
2662
2663 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2664    using the string concatenation database for TEST.
2665
2666    Assert that the character at index IDX is on EXPECTED_LINE,
2667    and that it begins at column EXPECTED_START_COL and ends at
2668    EXPECTED_FINISH_COL (unless the locations are beyond
2669    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2670    columns).  */
2671
2672 static void
2673 assert_char_at_range (const location &loc,
2674                       lexer_test& test,
2675                       location_t strloc, enum cpp_ttype type, int idx,
2676                       int expected_line, int expected_start_col,
2677                       int expected_finish_col)
2678 {
2679   cpp_reader *pfile = test.m_parser;
2680   string_concat_db *concats = &test.m_concats;
2681
2682   source_range actual_range = source_range();
2683   const char *err
2684     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2685                                  &actual_range);
2686   if (should_have_column_data_p (strloc))
2687     ASSERT_EQ_AT (loc, NULL, err);
2688   else
2689     {
2690       ASSERT_STREQ_AT (loc,
2691                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2692                        err);
2693       return;
2694     }
2695
2696   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2697   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2698   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2699   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2700
2701   if (should_have_column_data_p (actual_range.m_start))
2702     {
2703       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2704       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2705     }
2706   if (should_have_column_data_p (actual_range.m_finish))
2707     {
2708       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2709       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2710     }
2711 }
2712
2713 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2714    the effective location of any errors.  */
2715
2716 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2717                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2718   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2719                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2720                         (EXPECTED_FINISH_COL))
2721
2722 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2723    using the string concatenation database for TEST.
2724
2725    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2726
2727 static void
2728 assert_num_substring_ranges (const location &loc,
2729                              lexer_test& test,
2730                              location_t strloc,
2731                              enum cpp_ttype type,
2732                              int expected_num_ranges)
2733 {
2734   cpp_reader *pfile = test.m_parser;
2735   string_concat_db *concats = &test.m_concats;
2736
2737   int actual_num_ranges = -1;
2738   const char *err
2739     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2740                                            &actual_num_ranges);
2741   if (should_have_column_data_p (strloc))
2742     ASSERT_EQ_AT (loc, NULL, err);
2743   else
2744     {
2745       ASSERT_STREQ_AT (loc,
2746                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2747                        err);
2748       return;
2749     }
2750   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2751 }
2752
2753 /* Macro for calling assert_num_substring_ranges, supplying
2754    SELFTEST_LOCATION for the effective location of any errors.  */
2755
2756 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2757                                     EXPECTED_NUM_RANGES)                \
2758   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2759                                (TYPE), (EXPECTED_NUM_RANGES))
2760
2761
2762 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2763    returns an error (using the string concatenation database for TEST).  */
2764
2765 static void
2766 assert_has_no_substring_ranges (const location &loc,
2767                                 lexer_test& test,
2768                                 location_t strloc,
2769                                 enum cpp_ttype type,
2770                                 const char *expected_err)
2771 {
2772   cpp_reader *pfile = test.m_parser;
2773   string_concat_db *concats = &test.m_concats;
2774   cpp_substring_ranges ranges;
2775   const char *actual_err
2776     = get_substring_ranges_for_loc (pfile, concats, strloc,
2777                                     type, ranges);
2778   if (should_have_column_data_p (strloc))
2779     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2780   else
2781     ASSERT_STREQ_AT (loc,
2782                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2783                      actual_err);
2784 }
2785
2786 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2787     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2788                                     (STRLOC), (TYPE), (ERR))
2789
2790 /* Lex a simple string literal.  Verify the substring location data, before
2791    and after running cpp_interpret_string on it.  */
2792
2793 static void
2794 test_lexer_string_locations_simple (const line_table_case &case_)
2795 {
2796   /* Digits 0-9 (with 0 at column 10), the simple way.
2797      ....................000000000.11111111112.2222222223333333333
2798      ....................123456789.01234567890.1234567890123456789
2799      We add a trailing comment to ensure that we correctly locate
2800      the end of the string literal token.  */
2801   const char *content = "        \"0123456789\" /* not a string */\n";
2802   lexer_test test (case_, content, NULL);
2803
2804   /* Verify that we get the expected token back, with the correct
2805      location information.  */
2806   const cpp_token *tok = test.get_token ();
2807   ASSERT_EQ (tok->type, CPP_STRING);
2808   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2809   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2810
2811   /* At this point in lexing, the quote characters are treated as part of
2812      the string (they are stripped off by cpp_interpret_string).  */
2813
2814   ASSERT_EQ (tok->val.str.len, 12);
2815
2816   /* Verify that cpp_interpret_string works.  */
2817   cpp_string dst_string;
2818   const enum cpp_ttype type = CPP_STRING;
2819   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2820                                       &dst_string, type);
2821   ASSERT_TRUE (result);
2822   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2823   free (const_cast <unsigned char *> (dst_string.text));
2824
2825   /* Verify ranges of individual characters.  This no longer includes the
2826      opening quote, but does include the closing quote.  */
2827   for (int i = 0; i <= 10; i++)
2828     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2829                           10 + i, 10 + i);
2830
2831   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2832 }
2833
2834 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2835    encoding.  */
2836
2837 static void
2838 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2839 {
2840   /* EBCDIC support requires iconv.  */
2841   if (!HAVE_ICONV)
2842     return;
2843
2844   /* Digits 0-9 (with 0 at column 10), the simple way.
2845      ....................000000000.11111111112.2222222223333333333
2846      ....................123456789.01234567890.1234567890123456789
2847      We add a trailing comment to ensure that we correctly locate
2848      the end of the string literal token.  */
2849   const char *content = "        \"0123456789\" /* not a string */\n";
2850   ebcdic_execution_charset use_ebcdic;
2851   lexer_test test (case_, content, &use_ebcdic);
2852
2853   /* Verify that we get the expected token back, with the correct
2854      location information.  */
2855   const cpp_token *tok = test.get_token ();
2856   ASSERT_EQ (tok->type, CPP_STRING);
2857   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2858   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2859
2860   /* At this point in lexing, the quote characters are treated as part of
2861      the string (they are stripped off by cpp_interpret_string).  */
2862
2863   ASSERT_EQ (tok->val.str.len, 12);
2864
2865   /* The remainder of the test requires an iconv implementation that
2866      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2867   if (use_ebcdic.iconv_errors_occurred_p ())
2868     return;
2869
2870   /* Verify that cpp_interpret_string works.  */
2871   cpp_string dst_string;
2872   const enum cpp_ttype type = CPP_STRING;
2873   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2874                                       &dst_string, type);
2875   ASSERT_TRUE (result);
2876   /* We should now have EBCDIC-encoded text, specifically
2877      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2878      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2879   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2880                 (const char *)dst_string.text);
2881   free (const_cast <unsigned char *> (dst_string.text));
2882
2883   /* Verify that we don't attempt to record substring location information
2884      for such cases.  */
2885   ASSERT_HAS_NO_SUBSTRING_RANGES
2886     (test, tok->src_loc, type,
2887      "execution character set != source character set");
2888 }
2889
2890 /* Lex a string literal containing a hex-escaped character.
2891    Verify the substring location data, before and after running
2892    cpp_interpret_string on it.  */
2893
2894 static void
2895 test_lexer_string_locations_hex (const line_table_case &case_)
2896 {
2897   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2898      and with a space in place of digit 6, to terminate the escaped
2899      hex code.
2900      ....................000000000.111111.11112222.
2901      ....................123456789.012345.67890123.  */
2902   const char *content = "        \"01234\\x35 789\"\n";
2903   lexer_test test (case_, content, NULL);
2904
2905   /* Verify that we get the expected token back, with the correct
2906      location information.  */
2907   const cpp_token *tok = test.get_token ();
2908   ASSERT_EQ (tok->type, CPP_STRING);
2909   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2910   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2911
2912   /* At this point in lexing, the quote characters are treated as part of
2913      the string (they are stripped off by cpp_interpret_string).  */
2914   ASSERT_EQ (tok->val.str.len, 15);
2915
2916   /* Verify that cpp_interpret_string works.  */
2917   cpp_string dst_string;
2918   const enum cpp_ttype type = CPP_STRING;
2919   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2920                                       &dst_string, type);
2921   ASSERT_TRUE (result);
2922   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2923   free (const_cast <unsigned char *> (dst_string.text));
2924
2925   /* Verify ranges of individual characters.  This no longer includes the
2926      opening quote, but does include the closing quote.  */
2927   for (int i = 0; i <= 4; i++)
2928     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2929   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2930   for (int i = 6; i <= 10; i++)
2931     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2932
2933   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2934 }
2935
2936 /* Lex a string literal containing an octal-escaped character.
2937    Verify the substring location data after running cpp_interpret_string
2938    on it.  */
2939
2940 static void
2941 test_lexer_string_locations_oct (const line_table_case &case_)
2942 {
2943   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2944      and with a space in place of digit 6, to terminate the escaped
2945      octal code.
2946      ....................000000000.111111.11112222.2222223333333333444
2947      ....................123456789.012345.67890123.4567890123456789012  */
2948   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2949   lexer_test test (case_, content, NULL);
2950
2951   /* Verify that we get the expected token back, with the correct
2952      location information.  */
2953   const cpp_token *tok = test.get_token ();
2954   ASSERT_EQ (tok->type, CPP_STRING);
2955   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2956
2957   /* Verify that cpp_interpret_string works.  */
2958   cpp_string dst_string;
2959   const enum cpp_ttype type = CPP_STRING;
2960   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2961                                       &dst_string, type);
2962   ASSERT_TRUE (result);
2963   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2964   free (const_cast <unsigned char *> (dst_string.text));
2965
2966   /* Verify ranges of individual characters.  This no longer includes the
2967      opening quote, but does include the closing quote.  */
2968   for (int i = 0; i < 5; i++)
2969     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2970   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2971   for (int i = 6; i <= 10; i++)
2972     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2973
2974   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2975 }
2976
2977 /* Test of string literal containing letter escapes.  */
2978
2979 static void
2980 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2981 {
2982   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2983      .....................000000000.1.11111.1.1.11222.22222223333333
2984      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2985   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2986   lexer_test test (case_, content, NULL);
2987
2988   /* Verify that we get the expected tokens back.  */
2989   const cpp_token *tok = test.get_token ();
2990   ASSERT_EQ (tok->type, CPP_STRING);
2991   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2992
2993   /* Verify ranges of individual characters. */
2994   /* "\t".  */
2995   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2996                         0, 1, 10, 11);
2997   /* "foo". */
2998   for (int i = 1; i <= 3; i++)
2999     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3000                           i, 1, 11 + i, 11 + i);
3001   /* "\\" and "\n".  */
3002   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3003                         4, 1, 15, 16);
3004   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3005                         5, 1, 17, 18);
3006
3007   /* "bar" and closing quote for nul-terminator.  */
3008   for (int i = 6; i <= 9; i++)
3009     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3010                           i, 1, 13 + i, 13 + i);
3011
3012   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
3013 }
3014
3015 /* Another test of a string literal containing a letter escape.
3016    Based on string seen in
3017      printf ("%-%\n");
3018    in gcc.dg/format/c90-printf-1.c.  */
3019
3020 static void
3021 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
3022 {
3023   /* .....................000000000.1111.11.1111.22222222223.
3024      .....................123456789.0123.45.6789.01234567890.  */
3025   const char *content = ("        \"%-%\\n\" /* non-str */\n");
3026   lexer_test test (case_, content, NULL);
3027
3028   /* Verify that we get the expected tokens back.  */
3029   const cpp_token *tok = test.get_token ();
3030   ASSERT_EQ (tok->type, CPP_STRING);
3031   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
3032
3033   /* Verify ranges of individual characters. */
3034   /* "%-%".  */
3035   for (int i = 0; i < 3; i++)
3036     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3037                           i, 1, 10 + i, 10 + i);
3038   /* "\n".  */
3039   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3040                         3, 1, 13, 14);
3041
3042   /* Closing quote for nul-terminator.  */
3043   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3044                         4, 1, 15, 15);
3045
3046   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
3047 }
3048
3049 /* Lex a string literal containing UCN 4 characters.
3050    Verify the substring location data after running cpp_interpret_string
3051    on it.  */
3052
3053 static void
3054 test_lexer_string_locations_ucn4 (const line_table_case &case_)
3055 {
3056   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
3057      as UCN 4.
3058      ....................000000000.111111.111122.222222223.33333333344444
3059      ....................123456789.012345.678901.234567890.12345678901234  */
3060   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
3061   lexer_test test (case_, content, NULL);
3062
3063   /* Verify that we get the expected token back, with the correct
3064      location information.  */
3065   const cpp_token *tok = test.get_token ();
3066   ASSERT_EQ (tok->type, CPP_STRING);
3067   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
3068
3069   /* Verify that cpp_interpret_string works.
3070      The string should be encoded in the execution character
3071      set.  Assuming that is UTF-8, we should have the following:
3072      -----------  ----  -----  -------  ----------------
3073      Byte offset  Byte  Octal  Unicode  Source Column(s)
3074      -----------  ----  -----  -------  ----------------
3075      0            0x30         '0'      10
3076      1            0x31         '1'      11
3077      2            0x32         '2'      12
3078      3            0x33         '3'      13
3079      4            0x34         '4'      14
3080      5            0xE2  \342   U+2174   15-20
3081      6            0x85  \205    (cont)  15-20
3082      7            0xB4  \264    (cont)  15-20
3083      8            0xE2  \342   U+2175   21-26
3084      9            0x85  \205    (cont)  21-26
3085      10           0xB5  \265    (cont)  21-26
3086      11           0x37         '7'      27
3087      12           0x38         '8'      28
3088      13           0x39         '9'      29
3089      14           0x00                  30 (closing quote)
3090      -----------  ----  -----  -------  ---------------.  */
3091
3092   cpp_string dst_string;
3093   const enum cpp_ttype type = CPP_STRING;
3094   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3095                                       &dst_string, type);
3096   ASSERT_TRUE (result);
3097   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3098                 (const char *)dst_string.text);
3099   free (const_cast <unsigned char *> (dst_string.text));
3100
3101   /* Verify ranges of individual characters.  This no longer includes the
3102      opening quote, but does include the closing quote.
3103      '01234'.  */
3104   for (int i = 0; i <= 4; i++)
3105     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3106   /* U+2174.  */
3107   for (int i = 5; i <= 7; i++)
3108     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
3109   /* U+2175.  */
3110   for (int i = 8; i <= 10; i++)
3111     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
3112   /* '789' and nul terminator  */
3113   for (int i = 11; i <= 14; i++)
3114     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
3115
3116   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3117 }
3118
3119 /* Lex a string literal containing UCN 8 characters.
3120    Verify the substring location data after running cpp_interpret_string
3121    on it.  */
3122
3123 static void
3124 test_lexer_string_locations_ucn8 (const line_table_case &case_)
3125 {
3126   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
3127      ....................000000000.111111.1111222222.2222333333333.344444
3128      ....................123456789.012345.6789012345.6789012345678.901234  */
3129   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
3130   lexer_test test (case_, content, NULL);
3131
3132   /* Verify that we get the expected token back, with the correct
3133      location information.  */
3134   const cpp_token *tok = test.get_token ();
3135   ASSERT_EQ (tok->type, CPP_STRING);
3136   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
3137                            "\"01234\\U00002174\\U00002175789\"");
3138
3139   /* Verify that cpp_interpret_string works.
3140      The UTF-8 encoding of the string is identical to that from
3141      the ucn4 testcase above; the only difference is the column
3142      locations.  */
3143   cpp_string dst_string;
3144   const enum cpp_ttype type = CPP_STRING;
3145   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3146                                       &dst_string, type);
3147   ASSERT_TRUE (result);
3148   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3149                 (const char *)dst_string.text);
3150   free (const_cast <unsigned char *> (dst_string.text));
3151
3152   /* Verify ranges of individual characters.  This no longer includes the
3153      opening quote, but does include the closing quote.
3154      '01234'.  */
3155   for (int i = 0; i <= 4; i++)
3156     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3157   /* U+2174.  */
3158   for (int i = 5; i <= 7; i++)
3159     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
3160   /* U+2175.  */
3161   for (int i = 8; i <= 10; i++)
3162     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
3163   /* '789' at columns 35-37  */
3164   for (int i = 11; i <= 13; i++)
3165     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
3166   /* Closing quote/nul-terminator at column 38.  */
3167   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
3168
3169   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3170 }
3171
3172 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
3173
3174 static uint32_t
3175 uint32_from_big_endian (const uint32_t *ptr_be_value)
3176 {
3177   const unsigned char *buf = (const unsigned char *)ptr_be_value;
3178   return (((uint32_t) buf[0] << 24)
3179           | ((uint32_t) buf[1] << 16)
3180           | ((uint32_t) buf[2] << 8)
3181           | (uint32_t) buf[3]);
3182 }
3183
3184 /* Lex a wide string literal and verify that attempts to read substring
3185    location data from it fail gracefully.  */
3186
3187 static void
3188 test_lexer_string_locations_wide_string (const line_table_case &case_)
3189 {
3190   /* Digits 0-9.
3191      ....................000000000.11111111112.22222222233333
3192      ....................123456789.01234567890.12345678901234  */
3193   const char *content = "       L\"0123456789\" /* non-str */\n";
3194   lexer_test test (case_, content, NULL);
3195
3196   /* Verify that we get the expected token back, with the correct
3197      location information.  */
3198   const cpp_token *tok = test.get_token ();
3199   ASSERT_EQ (tok->type, CPP_WSTRING);
3200   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
3201
3202   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
3203   cpp_string dst_string;
3204   const enum cpp_ttype type = CPP_WSTRING;
3205   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3206                                       &dst_string, type);
3207   ASSERT_TRUE (result);
3208   /* The cpp_reader defaults to big-endian with
3209      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3210      now be encoded as UTF-32BE.  */
3211   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3212   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3213   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3214   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3215   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3216   free (const_cast <unsigned char *> (dst_string.text));
3217
3218   /* We don't yet support generating substring location information
3219      for L"" strings.  */
3220   ASSERT_HAS_NO_SUBSTRING_RANGES
3221     (test, tok->src_loc, type,
3222      "execution character set != source character set");
3223 }
3224
3225 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
3226
3227 static uint16_t
3228 uint16_from_big_endian (const uint16_t *ptr_be_value)
3229 {
3230   const unsigned char *buf = (const unsigned char *)ptr_be_value;
3231   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3232 }
3233
3234 /* Lex a u"" string literal and verify that attempts to read substring
3235    location data from it fail gracefully.  */
3236
3237 static void
3238 test_lexer_string_locations_string16 (const line_table_case &case_)
3239 {
3240   /* Digits 0-9.
3241      ....................000000000.11111111112.22222222233333
3242      ....................123456789.01234567890.12345678901234  */
3243   const char *content = "       u\"0123456789\" /* non-str */\n";
3244   lexer_test test (case_, content, NULL);
3245
3246   /* Verify that we get the expected token back, with the correct
3247      location information.  */
3248   const cpp_token *tok = test.get_token ();
3249   ASSERT_EQ (tok->type, CPP_STRING16);
3250   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3251
3252   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
3253   cpp_string dst_string;
3254   const enum cpp_ttype type = CPP_STRING16;
3255   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3256                                       &dst_string, type);
3257   ASSERT_TRUE (result);
3258
3259   /* The cpp_reader defaults to big-endian, so dst_string should
3260      now be encoded as UTF-16BE.  */
3261   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3262   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3263   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3264   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3265   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3266   free (const_cast <unsigned char *> (dst_string.text));
3267
3268   /* We don't yet support generating substring location information
3269      for L"" strings.  */
3270   ASSERT_HAS_NO_SUBSTRING_RANGES
3271     (test, tok->src_loc, type,
3272      "execution character set != source character set");
3273 }
3274
3275 /* Lex a U"" string literal and verify that attempts to read substring
3276    location data from it fail gracefully.  */
3277
3278 static void
3279 test_lexer_string_locations_string32 (const line_table_case &case_)
3280 {
3281   /* Digits 0-9.
3282      ....................000000000.11111111112.22222222233333
3283      ....................123456789.01234567890.12345678901234  */
3284   const char *content = "       U\"0123456789\" /* non-str */\n";
3285   lexer_test test (case_, content, NULL);
3286
3287   /* Verify that we get the expected token back, with the correct
3288      location information.  */
3289   const cpp_token *tok = test.get_token ();
3290   ASSERT_EQ (tok->type, CPP_STRING32);
3291   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3292
3293   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
3294   cpp_string dst_string;
3295   const enum cpp_ttype type = CPP_STRING32;
3296   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3297                                       &dst_string, type);
3298   ASSERT_TRUE (result);
3299
3300   /* The cpp_reader defaults to big-endian, so dst_string should
3301      now be encoded as UTF-32BE.  */
3302   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3303   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3304   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3305   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3306   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3307   free (const_cast <unsigned char *> (dst_string.text));
3308
3309   /* We don't yet support generating substring location information
3310      for L"" strings.  */
3311   ASSERT_HAS_NO_SUBSTRING_RANGES
3312     (test, tok->src_loc, type,
3313      "execution character set != source character set");
3314 }
3315
3316 /* Lex a u8-string literal.
3317    Verify the substring location data after running cpp_interpret_string
3318    on it.  */
3319
3320 static void
3321 test_lexer_string_locations_u8 (const line_table_case &case_)
3322 {
3323   /* Digits 0-9.
3324      ....................000000000.11111111112.22222222233333
3325      ....................123456789.01234567890.12345678901234  */
3326   const char *content = "      u8\"0123456789\" /* non-str */\n";
3327   lexer_test test (case_, content, NULL);
3328
3329   /* Verify that we get the expected token back, with the correct
3330      location information.  */
3331   const cpp_token *tok = test.get_token ();
3332   ASSERT_EQ (tok->type, CPP_UTF8STRING);
3333   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3334
3335   /* Verify that cpp_interpret_string works.  */
3336   cpp_string dst_string;
3337   const enum cpp_ttype type = CPP_STRING;
3338   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3339                                       &dst_string, type);
3340   ASSERT_TRUE (result);
3341   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3342   free (const_cast <unsigned char *> (dst_string.text));
3343
3344   /* Verify ranges of individual characters.  This no longer includes the
3345      opening quote, but does include the closing quote.  */
3346   for (int i = 0; i <= 10; i++)
3347     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3348 }
3349
3350 /* Lex a string literal containing UTF-8 source characters.
3351    Verify the substring location data after running cpp_interpret_string
3352    on it.  */
3353
3354 static void
3355 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3356 {
3357  /* This string literal is written out to the source file as UTF-8,
3358     and is of the form "before mojibake after", where "mojibake"
3359     is written as the following four unicode code points:
3360        U+6587 CJK UNIFIED IDEOGRAPH-6587
3361        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3362        U+5316 CJK UNIFIED IDEOGRAPH-5316
3363        U+3051 HIRAGANA LETTER KE.
3364      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3365      "before" and "after" are 1 byte per unicode character.
3366
3367      The numbering shown are "columns", which are *byte* numbers within
3368      the line, rather than unicode character numbers.
3369
3370      .................... 000000000.1111111.
3371      .................... 123456789.0123456.  */
3372   const char *content = ("        \"before "
3373                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3374                               UTF-8: 0xE6 0x96 0x87
3375                               C octal escaped UTF-8: \346\226\207
3376                             "column" numbers: 17-19.  */
3377                          "\346\226\207"
3378
3379                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3380                               UTF-8: 0xE5 0xAD 0x97
3381                               C octal escaped UTF-8: \345\255\227
3382                             "column" numbers: 20-22.  */
3383                          "\345\255\227"
3384
3385                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3386                               UTF-8: 0xE5 0x8C 0x96
3387                               C octal escaped UTF-8: \345\214\226
3388                             "column" numbers: 23-25.  */
3389                          "\345\214\226"
3390
3391                          /* U+3051 HIRAGANA LETTER KE
3392                               UTF-8: 0xE3 0x81 0x91
3393                               C octal escaped UTF-8: \343\201\221
3394                             "column" numbers: 26-28.  */
3395                          "\343\201\221"
3396
3397                          /* column numbers 29 onwards
3398                           2333333.33334444444444
3399                           9012345.67890123456789. */
3400                          " after\" /* non-str */\n");
3401   lexer_test test (case_, content, NULL);
3402
3403   /* Verify that we get the expected token back, with the correct
3404      location information.  */
3405   const cpp_token *tok = test.get_token ();
3406   ASSERT_EQ (tok->type, CPP_STRING);
3407   ASSERT_TOKEN_AS_TEXT_EQ
3408     (test.m_parser, tok,
3409      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3410
3411   /* Verify that cpp_interpret_string works.  */
3412   cpp_string dst_string;
3413   const enum cpp_ttype type = CPP_STRING;
3414   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3415                                       &dst_string, type);
3416   ASSERT_TRUE (result);
3417   ASSERT_STREQ
3418     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3419      (const char *)dst_string.text);
3420   free (const_cast <unsigned char *> (dst_string.text));
3421
3422   /* Verify ranges of individual characters.  This no longer includes the
3423      opening quote, but does include the closing quote.
3424      Assuming that both source and execution encodings are UTF-8, we have
3425      a run of 25 octets in each, plus the NUL terminator.  */
3426   for (int i = 0; i < 25; i++)
3427     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3428   /* NUL-terminator should use the closing quote at column 35.  */
3429   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3430
3431   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3432 }
3433
3434 /* Test of string literal concatenation.  */
3435
3436 static void
3437 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3438 {
3439   /* Digits 0-9.
3440      .....................000000000.111111.11112222222222
3441      .....................123456789.012345.67890123456789.  */
3442   const char *content = ("        \"01234\" /* non-str */\n"
3443                          "        \"56789\" /* non-str */\n");
3444   lexer_test test (case_, content, NULL);
3445
3446   location_t input_locs[2];
3447
3448   /* Verify that we get the expected tokens back.  */
3449   auto_vec <cpp_string> input_strings;
3450   const cpp_token *tok_a = test.get_token ();
3451   ASSERT_EQ (tok_a->type, CPP_STRING);
3452   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3453   input_strings.safe_push (tok_a->val.str);
3454   input_locs[0] = tok_a->src_loc;
3455
3456   const cpp_token *tok_b = test.get_token ();
3457   ASSERT_EQ (tok_b->type, CPP_STRING);
3458   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3459   input_strings.safe_push (tok_b->val.str);
3460   input_locs[1] = tok_b->src_loc;
3461
3462   /* Verify that cpp_interpret_string works.  */
3463   cpp_string dst_string;
3464   const enum cpp_ttype type = CPP_STRING;
3465   bool result = cpp_interpret_string (test.m_parser,
3466                                       input_strings.address (), 2,
3467                                       &dst_string, type);
3468   ASSERT_TRUE (result);
3469   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3470   free (const_cast <unsigned char *> (dst_string.text));
3471
3472   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3473   test.m_concats.record_string_concatenation (2, input_locs);
3474
3475   location_t initial_loc = input_locs[0];
3476
3477   /* "01234" on line 1.  */
3478   for (int i = 0; i <= 4; i++)
3479     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3480   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3481   for (int i = 5; i <= 10; i++)
3482     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3483
3484   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3485 }
3486
3487 /* Another test of string literal concatenation.  */
3488
3489 static void
3490 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3491 {
3492   /* Digits 0-9.
3493      .....................000000000.111.11111112222222
3494      .....................123456789.012.34567890123456.  */
3495   const char *content = ("        \"01\" /* non-str */\n"
3496                          "        \"23\" /* non-str */\n"
3497                          "        \"45\" /* non-str */\n"
3498                          "        \"67\" /* non-str */\n"
3499                          "        \"89\" /* non-str */\n");
3500   lexer_test test (case_, content, NULL);
3501
3502   auto_vec <cpp_string> input_strings;
3503   location_t input_locs[5];
3504
3505   /* Verify that we get the expected tokens back.  */
3506   for (int i = 0; i < 5; i++)
3507     {
3508       const cpp_token *tok = test.get_token ();
3509       ASSERT_EQ (tok->type, CPP_STRING);
3510       input_strings.safe_push (tok->val.str);
3511       input_locs[i] = tok->src_loc;
3512     }
3513
3514   /* Verify that cpp_interpret_string works.  */
3515   cpp_string dst_string;
3516   const enum cpp_ttype type = CPP_STRING;
3517   bool result = cpp_interpret_string (test.m_parser,
3518                                       input_strings.address (), 5,
3519                                       &dst_string, type);
3520   ASSERT_TRUE (result);
3521   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3522   free (const_cast <unsigned char *> (dst_string.text));
3523
3524   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3525   test.m_concats.record_string_concatenation (5, input_locs);
3526
3527   location_t initial_loc = input_locs[0];
3528
3529   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3530      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3531      and expect get_source_range_for_substring to fail.
3532      However, for a string concatenation test, we can have a case
3533      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3534      but subsequent strings can be after it.
3535      Attempting to detect this within assert_char_at_range
3536      would overcomplicate the logic for the common test cases, so
3537      we detect it here.  */
3538   if (should_have_column_data_p (input_locs[0])
3539       && !should_have_column_data_p (input_locs[4]))
3540     {
3541       /* Verify that get_source_range_for_substring gracefully rejects
3542          this case.  */
3543       source_range actual_range;
3544       const char *err
3545         = get_source_range_for_char (test.m_parser, &test.m_concats,
3546                                      initial_loc, type, 0, &actual_range);
3547       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3548       return;
3549     }
3550
3551   for (int i = 0; i < 5; i++)
3552     for (int j = 0; j < 2; j++)
3553       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3554                             i + 1, 10 + j, 10 + j);
3555
3556   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3557   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3558
3559   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3560 }
3561
3562 /* Another test of string literal concatenation, this time combined with
3563    various kinds of escaped characters.  */
3564
3565 static void
3566 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3567 {
3568   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3569      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3570   const char *content
3571     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3572        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3573     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3574   lexer_test test (case_, content, NULL);
3575
3576   auto_vec <cpp_string> input_strings;
3577   location_t input_locs[4];
3578
3579   /* Verify that we get the expected tokens back.  */
3580   for (int i = 0; i < 4; i++)
3581     {
3582       const cpp_token *tok = test.get_token ();
3583       ASSERT_EQ (tok->type, CPP_STRING);
3584       input_strings.safe_push (tok->val.str);
3585       input_locs[i] = tok->src_loc;
3586     }
3587
3588   /* Verify that cpp_interpret_string works.  */
3589   cpp_string dst_string;
3590   const enum cpp_ttype type = CPP_STRING;
3591   bool result = cpp_interpret_string (test.m_parser,
3592                                       input_strings.address (), 4,
3593                                       &dst_string, type);
3594   ASSERT_TRUE (result);
3595   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3596   free (const_cast <unsigned char *> (dst_string.text));
3597
3598   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
3599   test.m_concats.record_string_concatenation (4, input_locs);
3600
3601   location_t initial_loc = input_locs[0];
3602
3603   for (int i = 0; i <= 4; i++)
3604     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3605   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3606   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3607   for (int i = 7; i <= 9; i++)
3608     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3609
3610   /* NUL-terminator should use the location of the final closing quote.  */
3611   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3612
3613   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3614 }
3615
3616 /* Test of string literal in a macro.  */
3617
3618 static void
3619 test_lexer_string_locations_macro (const line_table_case &case_)
3620 {
3621   /* Digits 0-9.
3622      .....................0000000001111111111.22222222223.
3623      .....................1234567890123456789.01234567890.  */
3624   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3625                          "  MACRO");
3626   lexer_test test (case_, content, NULL);
3627
3628   /* Verify that we get the expected tokens back.  */
3629   const cpp_token *tok = test.get_token ();
3630   ASSERT_EQ (tok->type, CPP_PADDING);
3631
3632   tok = test.get_token ();
3633   ASSERT_EQ (tok->type, CPP_STRING);
3634   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3635
3636   /* Verify ranges of individual characters.  We ought to
3637      see columns within the macro definition.  */
3638   for (int i = 0; i <= 10; i++)
3639     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3640                           i, 1, 20 + i, 20 + i);
3641
3642   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3643
3644   tok = test.get_token ();
3645   ASSERT_EQ (tok->type, CPP_PADDING);
3646 }
3647
3648 /* Test of stringification of a macro argument.  */
3649
3650 static void
3651 test_lexer_string_locations_stringified_macro_argument
3652   (const line_table_case &case_)
3653 {
3654   /* .....................000000000111111111122222222223.
3655      .....................123456789012345678901234567890.  */
3656   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3657                          "MACRO(foo)\n");
3658   lexer_test test (case_, content, NULL);
3659
3660   /* Verify that we get the expected token back.  */
3661   const cpp_token *tok = test.get_token ();
3662   ASSERT_EQ (tok->type, CPP_PADDING);
3663
3664   tok = test.get_token ();
3665   ASSERT_EQ (tok->type, CPP_STRING);
3666   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3667
3668   /* We don't support getting the location of a stringified macro
3669      argument.  Verify that it fails gracefully.  */
3670   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3671                                   "cpp_interpret_string_1 failed");
3672
3673   tok = test.get_token ();
3674   ASSERT_EQ (tok->type, CPP_PADDING);
3675
3676   tok = test.get_token ();
3677   ASSERT_EQ (tok->type, CPP_PADDING);
3678 }
3679
3680 /* Ensure that we are fail gracefully if something attempts to pass
3681    in a location that isn't a string literal token.  Seen on this code:
3682
3683      const char a[] = " %d ";
3684      __builtin_printf (a, 0.5);
3685                        ^
3686
3687    when c-format.cc erroneously used the indicated one-character
3688    location as the format string location, leading to a read past the
3689    end of a string buffer in cpp_interpret_string_1.  */
3690
3691 static void
3692 test_lexer_string_locations_non_string (const line_table_case &case_)
3693 {
3694   /* .....................000000000111111111122222222223.
3695      .....................123456789012345678901234567890.  */
3696   const char *content = ("         a\n");
3697   lexer_test test (case_, content, NULL);
3698
3699   /* Verify that we get the expected token back.  */
3700   const cpp_token *tok = test.get_token ();
3701   ASSERT_EQ (tok->type, CPP_NAME);
3702   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3703
3704   /* At this point, libcpp is attempting to interpret the name as a
3705      string literal, despite it not starting with a quote.  We don't detect
3706      that, but we should at least fail gracefully.  */
3707   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3708                                   "cpp_interpret_string_1 failed");
3709 }
3710
3711 /* Ensure that we can read substring information for a token which
3712    starts in one linemap and ends in another .  Adapted from
3713    gcc.dg/cpp/pr69985.c.  */
3714
3715 static void
3716 test_lexer_string_locations_long_line (const line_table_case &case_)
3717 {
3718   /* .....................000000.000111111111
3719      .....................123456.789012346789.  */
3720   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3721                          "     \"0123456789012345678901234567890123456789"
3722                          "0123456789012345678901234567890123456789"
3723                          "0123456789012345678901234567890123456789"
3724                          "0123456789\"\n");
3725
3726   lexer_test test (case_, content, NULL);
3727
3728   /* Verify that we get the expected token back.  */
3729   const cpp_token *tok = test.get_token ();
3730   ASSERT_EQ (tok->type, CPP_STRING);
3731
3732   if (!should_have_column_data_p (line_table->highest_location))
3733     return;
3734
3735   /* Verify ranges of individual characters.  */
3736   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3737   for (int i = 0; i < 131; i++)
3738     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3739                           i, 2, 7 + i, 7 + i);
3740 }
3741
3742 /* Test of locations within a raw string that doesn't contain a newline.  */
3743
3744 static void
3745 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3746 {
3747   /* .....................00.0000000111111111122.
3748      .....................12.3456789012345678901.  */
3749   const char *content = ("R\"foo(0123456789)foo\"\n");
3750   lexer_test test (case_, content, NULL);
3751
3752   /* Verify that we get the expected token back.  */
3753   const cpp_token *tok = test.get_token ();
3754   ASSERT_EQ (tok->type, CPP_STRING);
3755
3756   /* Verify that cpp_interpret_string works.  */
3757   cpp_string dst_string;
3758   const enum cpp_ttype type = CPP_STRING;
3759   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3760                                       &dst_string, type);
3761   ASSERT_TRUE (result);
3762   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3763   free (const_cast <unsigned char *> (dst_string.text));
3764
3765   if (!should_have_column_data_p (line_table->highest_location))
3766     return;
3767
3768   /* 0-9, plus the nil terminator.  */
3769   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3770   for (int i = 0; i < 11; i++)
3771     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3772                           i, 1, 7 + i, 7 + i);
3773 }
3774
3775 /* Test of locations within a raw string that contains a newline.  */
3776
3777 static void
3778 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3779 {
3780   /* .....................00.0000.
3781      .....................12.3456.  */
3782   const char *content = ("R\"foo(\n"
3783   /* .....................00000.
3784      .....................12345.  */
3785                          "hello\n"
3786                          "world\n"
3787   /* .....................00000.
3788      .....................12345.  */
3789                          ")foo\"\n");
3790   lexer_test test (case_, content, NULL);
3791
3792   /* Verify that we get the expected token back.  */
3793   const cpp_token *tok = test.get_token ();
3794   ASSERT_EQ (tok->type, CPP_STRING);
3795
3796   /* Verify that cpp_interpret_string works.  */
3797   cpp_string dst_string;
3798   const enum cpp_ttype type = CPP_STRING;
3799   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3800                                       &dst_string, type);
3801   ASSERT_TRUE (result);
3802   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3803   free (const_cast <unsigned char *> (dst_string.text));
3804
3805   if (!should_have_column_data_p (line_table->highest_location))
3806     return;
3807
3808   /* Currently we don't support locations within raw strings that
3809      contain newlines.  */
3810   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3811                                   "range endpoints are on different lines");
3812 }
3813
3814 /* Test of parsing an unterminated raw string.  */
3815
3816 static void
3817 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3818 {
3819   const char *content = "R\"ouch()ouCh\" /* etc */";
3820
3821   lexer_diagnostic_sink diagnostics;
3822   lexer_test test (case_, content, &diagnostics);
3823   test.m_implicitly_expect_EOF = false;
3824
3825   /* Attempt to parse the raw string.  */
3826   const cpp_token *tok = test.get_token ();
3827   ASSERT_EQ (tok->type, CPP_EOF);
3828
3829   ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3830   /* We expect the message "unterminated raw string"
3831      in the "cpplib" translation domain.
3832      It's not clear that dgettext is available on all supported hosts,
3833      so this assertion is commented-out for now.
3834        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3835                      diagnostics.m_diagnostics[0]);
3836   */
3837 }
3838
3839 /* Test of lexing char constants.  */
3840
3841 static void
3842 test_lexer_char_constants (const line_table_case &case_)
3843 {
3844   /* Various char constants.
3845      .....................0000000001111111111.22222222223.
3846      .....................1234567890123456789.01234567890.  */
3847   const char *content = ("         'a'\n"
3848                          "        u'a'\n"
3849                          "        U'a'\n"
3850                          "        L'a'\n"
3851                          "         'abc'\n");
3852   lexer_test test (case_, content, NULL);
3853
3854   /* Verify that we get the expected tokens back.  */
3855   /* 'a'.  */
3856   const cpp_token *tok = test.get_token ();
3857   ASSERT_EQ (tok->type, CPP_CHAR);
3858   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3859
3860   unsigned int chars_seen;
3861   int unsignedp;
3862   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3863                                           &chars_seen, &unsignedp);
3864   ASSERT_EQ (cc, 'a');
3865   ASSERT_EQ (chars_seen, 1);
3866
3867   /* u'a'.  */
3868   tok = test.get_token ();
3869   ASSERT_EQ (tok->type, CPP_CHAR16);
3870   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3871
3872   /* U'a'.  */
3873   tok = test.get_token ();
3874   ASSERT_EQ (tok->type, CPP_CHAR32);
3875   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3876
3877   /* L'a'.  */
3878   tok = test.get_token ();
3879   ASSERT_EQ (tok->type, CPP_WCHAR);
3880   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3881
3882   /* 'abc' (c-char-sequence).  */
3883   tok = test.get_token ();
3884   ASSERT_EQ (tok->type, CPP_CHAR);
3885   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3886 }
3887 /* A table of interesting location_t values, giving one axis of our test
3888    matrix.  */
3889
3890 static const location_t boundary_locations[] = {
3891   /* Zero means "don't override the default values for a new line_table".  */
3892   0,
3893
3894   /* An arbitrary non-zero value that isn't close to one of
3895      the boundary values below.  */
3896   0x10000,
3897
3898   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3899   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3900   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3901   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3902   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3903   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3904
3905   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3906   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3907   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3908   LINE_MAP_MAX_LOCATION_WITH_COLS,
3909   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3910   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3911 };
3912
3913 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3914
3915 void
3916 for_each_line_table_case (void (*testcase) (const line_table_case &))
3917 {
3918   /* As noted above in the description of struct line_table_case,
3919      we want to explore a test matrix of interesting line_table
3920      situations, running various selftests for each case within the
3921      matrix.  */
3922
3923   /* Run all tests with:
3924      (a) line_table->default_range_bits == 0, and
3925      (b) line_table->default_range_bits == 5.  */
3926   int num_cases_tested = 0;
3927   for (int default_range_bits = 0; default_range_bits <= 5;
3928        default_range_bits += 5)
3929     {
3930       /* ...and use each of the "interesting" location values as
3931          the starting location within line_table.  */
3932       const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
3933       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3934         {
3935           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3936
3937           testcase (c);
3938
3939           num_cases_tested++;
3940         }
3941     }
3942
3943   /* Verify that we fully covered the test matrix.  */
3944   ASSERT_EQ (num_cases_tested, 2 * 12);
3945 }
3946
3947 /* Verify that when presented with a consecutive pair of locations with
3948    a very large line offset, we don't attempt to consolidate them into
3949    a single ordinary linemap where the line offsets within the line map
3950    would lead to overflow (PR lto/88147).  */
3951
3952 static void
3953 test_line_offset_overflow ()
3954 {
3955   line_table_test ltt (line_table_case (5, 0));
3956
3957   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3958   linemap_line_start (line_table, 1, 100);
3959   location_t loc_a = linemap_line_start (line_table, 2578, 255);
3960   assert_loceq ("foo.c", 2578, 0, loc_a);
3961
3962   const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3963   ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3964   ASSERT_EQ (ordmap_a->m_range_bits, 5);
3965
3966   location_t loc_b = linemap_line_start (line_table, 404198, 512);
3967   assert_loceq ("foo.c", 404198, 0, loc_b);
3968
3969   /* We should have started a new linemap, rather than attempting to store
3970      a very large line offset.  */
3971   const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3972   ASSERT_NE (ordmap_a, ordmap_b);
3973 }
3974
3975 void test_cpp_utf8 ()
3976 {
3977   const int def_tabstop = 8;
3978   cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3979
3980   /* Verify that wcwidth of invalid UTF-8 or control bytes is 1.  */
3981   {
3982     int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
3983     ASSERT_EQ (8, w_bad);
3984     int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
3985     ASSERT_EQ (5, w_ctrl);
3986   }
3987
3988   /* Verify that wcwidth of valid UTF-8 is as expected.  */
3989   {
3990     const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
3991     ASSERT_EQ (1, w_pi);
3992     const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
3993     ASSERT_EQ (2, w_emoji);
3994     const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3995                                                         policy);
3996     ASSERT_EQ (1, w_umlaut_precomposed);
3997     const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3998                                                       policy);
3999     ASSERT_EQ (1, w_umlaut_combining);
4000     const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
4001     ASSERT_EQ (2, w_han);
4002     const int w_ascii = cpp_display_width ("GCC", 3, policy);
4003     ASSERT_EQ (3, w_ascii);
4004     const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
4005                                            "\x9f! \xe4\xb8\xba y\xcc\x88",
4006                                            24, policy);
4007     ASSERT_EQ (18, w_mixed);
4008   }
4009
4010   /* Verify that display width properly expands tabs.  */
4011   {
4012     const char *tstr = "\tabc\td";
4013     ASSERT_EQ (6, cpp_display_width (tstr, 6,
4014                                      cpp_char_column_policy (1, cpp_wcwidth)));
4015     ASSERT_EQ (10, cpp_display_width (tstr, 6,
4016                                       cpp_char_column_policy (3, cpp_wcwidth)));
4017     ASSERT_EQ (17, cpp_display_width (tstr, 6,
4018                                       cpp_char_column_policy (8, cpp_wcwidth)));
4019     ASSERT_EQ (1,
4020                cpp_display_column_to_byte_column
4021                  (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
4022   }
4023
4024   /* Verify that cpp_byte_column_to_display_column can go past the end,
4025      and similar edge cases.  */
4026   {
4027     const char *str
4028       /* Display columns.
4029          111111112345  */
4030       = "\xcf\x80 abc";
4031       /* 111122223456
4032          Byte columns.  */
4033
4034     ASSERT_EQ (5, cpp_display_width (str, 6, policy));
4035     ASSERT_EQ (105,
4036                cpp_byte_column_to_display_column (str, 6, 106, policy));
4037     ASSERT_EQ (10000,
4038                cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
4039     ASSERT_EQ (0,
4040                cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
4041   }
4042
4043   /* Verify that cpp_display_column_to_byte_column can go past the end,
4044      and similar edge cases, and check invertibility.  */
4045   {
4046     const char *str
4047       /* Display columns.
4048          000000000000000000000000000000000000011
4049          111111112222222234444444455555555678901  */
4050       = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
4051       /* 000000000000000000000000000000000111111
4052          111122223333444456666777788889999012345
4053          Byte columns.  */
4054     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
4055     ASSERT_EQ (15,
4056                cpp_display_column_to_byte_column (str, 15, 11, policy));
4057     ASSERT_EQ (115,
4058                cpp_display_column_to_byte_column (str, 15, 111, policy));
4059     ASSERT_EQ (10000,
4060                cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
4061     ASSERT_EQ (0,
4062                cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
4063
4064     /* Verify that we do not interrupt a UTF-8 sequence.  */
4065     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
4066
4067     for (int byte_col = 1; byte_col <= 15; ++byte_col)
4068       {
4069         const int disp_col
4070           = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
4071         const int byte_col2
4072           = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
4073
4074         /* If we ask for the display column in the middle of a UTF-8
4075            sequence, it will return the length of the partial sequence,
4076            matching the behavior of GCC before display column support.
4077            Otherwise check the round trip was successful.  */
4078         if (byte_col < 4)
4079           ASSERT_EQ (byte_col, disp_col);
4080         else if (byte_col >= 6 && byte_col < 9)
4081           ASSERT_EQ (3 + (byte_col - 5), disp_col);
4082         else
4083           ASSERT_EQ (byte_col2, byte_col);
4084       }
4085   }
4086 }
4087
4088 static bool
4089 check_cpp_valid_utf8_p (const char *str)
4090 {
4091   return cpp_valid_utf8_p (str, strlen (str));
4092 }
4093
4094 /* Check that cpp_valid_utf8_p works as expected.  */
4095
4096 static void
4097 test_cpp_valid_utf8_p ()
4098 {
4099   ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world"));
4100
4101   /* 2-byte char (pi).  */
4102   ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80"));
4103
4104   /* 3-byte chars (the Japanese word "mojibake").  */
4105   ASSERT_TRUE (check_cpp_valid_utf8_p
4106                (
4107                 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
4108                    UTF-8: 0xE6 0x96 0x87
4109                    C octal escaped UTF-8: \346\226\207.  */
4110                 "\346\226\207"
4111                 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
4112                    UTF-8: 0xE5 0xAD 0x97
4113                    C octal escaped UTF-8: \345\255\227.  */
4114                 "\345\255\227"
4115                 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
4116                    UTF-8: 0xE5 0x8C 0x96
4117                    C octal escaped UTF-8: \345\214\226.  */
4118                 "\345\214\226"
4119                 /* U+3051 HIRAGANA LETTER KE
4120                    UTF-8: 0xE3 0x81 0x91
4121                    C octal escaped UTF-8: \343\201\221.  */
4122                 "\343\201\221"));
4123
4124   /* 4-byte char: an emoji.  */
4125   ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82"));
4126
4127   /* Control codes, including the NUL byte.  */
4128   ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", 5));
4129
4130   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!"));
4131
4132   /* Unexpected continuation bytes.  */
4133   for (unsigned char continuation_byte = 0x80;
4134        continuation_byte <= 0xbf;
4135        continuation_byte++)
4136     ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, 1));
4137
4138   /* "Lonely start characters" for 2-byte sequences.  */
4139   {
4140     unsigned char buf[2];
4141     buf[1] = ' ';
4142     for (buf[0] = 0xc0;
4143          buf[0] <= 0xdf;
4144          buf[0]++)
4145       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4146   }
4147
4148   /* "Lonely start characters" for 3-byte sequences.  */
4149   {
4150     unsigned char buf[2];
4151     buf[1] = ' ';
4152     for (buf[0] = 0xe0;
4153          buf[0] <= 0xef;
4154          buf[0]++)
4155       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4156   }
4157
4158   /* "Lonely start characters" for 4-byte sequences.  */
4159   {
4160     unsigned char buf[2];
4161     buf[1] = ' ';
4162     for (buf[0] = 0xf0;
4163          buf[0] <= 0xf4;
4164          buf[0]++)
4165       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4166   }
4167
4168   /* Invalid start characters (formerly valid for 5-byte and 6-byte
4169      sequences).  */
4170   {
4171     unsigned char buf[2];
4172     buf[1] = ' ';
4173     for (buf[0] = 0xf5;
4174          buf[0] <= 0xfd;
4175          buf[0]++)
4176       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4177   }
4178
4179   /* Impossible bytes.  */
4180   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0"));
4181   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1"));
4182   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe"));
4183   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff"));
4184 }
4185
4186 /* Run all of the selftests within this file.  */
4187
4188 void
4189 input_cc_tests ()
4190 {
4191   test_linenum_comparisons ();
4192   test_should_have_column_data_p ();
4193   test_unknown_location ();
4194   test_builtins ();
4195   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
4196
4197   for_each_line_table_case (test_accessing_ordinary_linemaps);
4198   for_each_line_table_case (test_lexer);
4199   for_each_line_table_case (test_lexer_string_locations_simple);
4200   for_each_line_table_case (test_lexer_string_locations_ebcdic);
4201   for_each_line_table_case (test_lexer_string_locations_hex);
4202   for_each_line_table_case (test_lexer_string_locations_oct);
4203   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
4204   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
4205   for_each_line_table_case (test_lexer_string_locations_ucn4);
4206   for_each_line_table_case (test_lexer_string_locations_ucn8);
4207   for_each_line_table_case (test_lexer_string_locations_wide_string);
4208   for_each_line_table_case (test_lexer_string_locations_string16);
4209   for_each_line_table_case (test_lexer_string_locations_string32);
4210   for_each_line_table_case (test_lexer_string_locations_u8);
4211   for_each_line_table_case (test_lexer_string_locations_utf8_source);
4212   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
4213   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
4214   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
4215   for_each_line_table_case (test_lexer_string_locations_macro);
4216   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
4217   for_each_line_table_case (test_lexer_string_locations_non_string);
4218   for_each_line_table_case (test_lexer_string_locations_long_line);
4219   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
4220   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
4221   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
4222   for_each_line_table_case (test_lexer_char_constants);
4223
4224   test_reading_source_line ();
4225
4226   test_line_offset_overflow ();
4227
4228   test_cpp_utf8 ();
4229   test_cpp_valid_utf8_p ();
4230 }
4231
4232 } // namespace selftest
4233
4234 #endif /* CHECKING_P */