gcc/input.c

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic-core.h"
  25 #include "selftest.h"
  26 #include "cpplib.h"
  27
  28 #ifndef HAVE_ICONV
  29 #define HAVE_ICONV 0
  30 #endif
  31
  32 /* This is a cache used by get_next_line to store the content of a
  33    file to be searched for file lines.  */
  34 struct fcache
  35 {
  36   /* These are information used to store a line boundary.  */
  37   struct line_info
  38   {
  39     /* The line number.  It starts from 1.  */
  40     size_t line_num;
  41
  42     /* The position (byte count) of the beginning of the line,
  43        relative to the file data pointer.  This starts at zero.  */
  44     size_t start_pos;
  45
  46     /* The position (byte count) of the last byte of the line.  This
  47        normally points to the '\n' character, or to one byte after the
  48        last byte of the file, if the file doesn't contain a '\n'
  49        character.  */
  50     size_t end_pos;
  51
  52     line_info (size_t l, size_t s, size_t e)
  53       : line_num (l), start_pos (s), end_pos (e)
  54     {}
  55
  56     line_info ()
  57       :line_num (0), start_pos (0), end_pos (0)
  58     {}
  59   };
  60
  61   /* The number of time this file has been accessed.  This is used
  62      to designate which file cache to evict from the cache
  63      array.  */
  64   unsigned use_count;
  65
  66   const char *file_path;
  67
  68   FILE *fp;
  69
  70   /* This points to the content of the file that we've read so
  71      far.  */
  72   char *data;
  73
  74   /*  The size of the DATA array above.*/
  75   size_t size;
  76
  77   /* The number of bytes read from the underlying file so far.  This
  78      must be less (or equal) than SIZE above.  */
  79   size_t nb_read;
  80
  81   /* The index of the beginning of the current line.  */
  82   size_t line_start_idx;
  83
  84   /* The number of the previous line read.  This starts at 1.  Zero
  85      means we've read no line so far.  */
  86   size_t line_num;
  87
  88   /* This is the total number of lines of the current file.  At the
  89      moment, we try to get this information from the line map
  90      subsystem.  Note that this is just a hint.  When using the C++
  91      front-end, this hint is correct because the input file is then
  92      completely tokenized before parsing starts; so the line map knows
  93      the number of lines before compilation really starts.  For e.g,
  94      the C front-end, it can happen that we start emitting diagnostics
  95      before the line map has seen the end of the file.  */
  96   size_t total_lines;
  97
  98   /* This is a record of the beginning and end of the lines we've seen
  99      while reading the file.  This is useful to avoid walking the data
 100      from the beginning when we are asked to read a line that is
 101      before LINE_START_IDX above.  Note that the maximum size of this
 102      record is fcache_line_record_size, so that the memory consumption
 103      doesn't explode.  We thus scale total_lines down to
 104      fcache_line_record_size.  */
 105   vec<line_info, va_heap> line_record;
 106
 107   fcache ();
 108   ~fcache ();
 109 };
 110
 111 /* Current position in real source file.  */
 112
 113 location_t input_location = UNKNOWN_LOCATION;
 114
 115 struct line_maps *line_table;
 116
 117 static fcache *fcache_tab;
 118 static const size_t fcache_tab_size = 16;
 119 static const size_t fcache_buffer_size = 4 * 1024;
 120 static const size_t fcache_line_record_size = 100;
 121
 122 /* Expand the source location LOC into a human readable location.  If
 123    LOC resolves to a builtin location, the file name of the readable
 124    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 125    TRUE and LOC is virtual, then it is resolved to the expansion
 126    point of the involved macro.  Otherwise, it is resolved to the
 127    spelling location of the token.
 128
 129    When resolving to the spelling location of the token, if the
 130    resulting location is for a built-in location (that is, it has no
 131    associated line/column) in the context of a macro expansion, the
 132    returned location is the first one (while unwinding the macro
 133    location towards its expansion point) that is in real source
 134    code.  */
 135
 136 static expanded_location
 137 expand_location_1 (source_location loc,
 138                    bool expansion_point_p)
 139 {
 140   expanded_location xloc;
 141   const line_map_ordinary *map;
 142   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 143   tree block = NULL;
 144
 145   if (IS_ADHOC_LOC (loc))
 146     {
 147       block = LOCATION_BLOCK (loc);
 148       loc = LOCATION_LOCUS (loc);
 149     }
 150
 151   memset (&xloc, 0, sizeof (xloc));
 152
 153   if (loc >= RESERVED_LOCATION_COUNT)
 154     {
 155       if (!expansion_point_p)
 156         {
 157           /* We want to resolve LOC to its spelling location.
 158
 159              But if that spelling location is a reserved location that
 160              appears in the context of a macro expansion (like for a
 161              location for a built-in token), let's consider the first
 162              location (toward the expansion point) that is not reserved;
 163              that is, the first location that is in real source code.  */
 164           loc = linemap_unwind_to_first_non_reserved_loc (line_table,
 165                                                           loc, NULL);
 166           lrk = LRK_SPELLING_LOCATION;
 167         }
 168       loc = linemap_resolve_location (line_table, loc,
 169                                       lrk, &map);
 170       xloc = linemap_expand_location (line_table, map, loc);
 171     }
 172
 173   xloc.data = block;
 174   if (loc <= BUILTINS_LOCATION)
 175     xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
 176
 177   return xloc;
 178 }
 179
 180 /* Initialize the set of cache used for files accessed by caret
 181    diagnostic.  */
 182
 183 static void
 184 diagnostic_file_cache_init (void)
 185 {
 186   if (fcache_tab == NULL)
 187     fcache_tab = new fcache[fcache_tab_size];
 188 }
 189
 190 /* Free the resources used by the set of cache used for files accessed
 191    by caret diagnostic.  */
 192
 193 void
 194 diagnostic_file_cache_fini (void)
 195 {
 196   if (fcache_tab)
 197     {
 198       delete [] (fcache_tab);
 199       fcache_tab = NULL;
 200     }
 201 }
 202
 203 /* Return the total lines number that have been read so far by the
 204    line map (in the preprocessor) so far.  For languages like C++ that
 205    entirely preprocess the input file before starting to parse, this
 206    equals the actual number of lines of the file.  */
 207
 208 static size_t
 209 total_lines_num (const char *file_path)
 210 {
 211   size_t r = 0;
 212   source_location l = 0;
 213   if (linemap_get_file_highest_location (line_table, file_path, &l))
 214     {
 215       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 216       expanded_location xloc = expand_location (l);
 217       r = xloc.line;
 218     }
 219   return r;
 220 }
 221
 222 /* Lookup the cache used for the content of a given file accessed by
 223    caret diagnostic.  Return the found cached file, or NULL if no
 224    cached file was found.  */
 225
 226 static fcache*
 227 lookup_file_in_cache_tab (const char *file_path)
 228 {
 229   if (file_path == NULL)
 230     return NULL;
 231
 232   diagnostic_file_cache_init ();
 233
 234   /* This will contain the found cached file.  */
 235   fcache *r = NULL;
 236   for (unsigned i = 0; i < fcache_tab_size; ++i)
 237     {
 238       fcache *c = &fcache_tab[i];
 239       if (c->file_path && !strcmp (c->file_path, file_path))
 240         {
 241           ++c->use_count;
 242           r = c;
 243         }
 244     }
 245
 246   if (r)
 247     ++r->use_count;
 248
 249   return r;
 250 }
 251
 252 /* Return the file cache that has been less used, recently, or the
 253    first empty one.  If HIGHEST_USE_COUNT is non-null,
 254    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 255    in the cache table.  */
 256
 257 static fcache*
 258 evicted_cache_tab_entry (unsigned *highest_use_count)
 259 {
 260   diagnostic_file_cache_init ();
 261
 262   fcache *to_evict = &fcache_tab[0];
 263   unsigned huc = to_evict->use_count;
 264   for (unsigned i = 1; i < fcache_tab_size; ++i)
 265     {
 266       fcache *c = &fcache_tab[i];
 267       bool c_is_empty = (c->file_path == NULL);
 268
 269       if (c->use_count < to_evict->use_count
 270           || (to_evict->file_path && c_is_empty))
 271         /* We evict C because it's either an entry with a lower use
 272            count or one that is empty.  */
 273         to_evict = c;
 274
 275       if (huc < c->use_count)
 276         huc = c->use_count;
 277
 278       if (c_is_empty)
 279         /* We've reached the end of the cache; subsequent elements are
 280            all empty.  */
 281         break;
 282     }
 283
 284   if (highest_use_count)
 285     *highest_use_count = huc;
 286
 287   return to_evict;
 288 }
 289
 290 /* Create the cache used for the content of a given file to be
 291    accessed by caret diagnostic.  This cache is added to an array of
 292    cache and can be retrieved by lookup_file_in_cache_tab.  This
 293    function returns the created cache.  Note that only the last
 294    fcache_tab_size files are cached.  */
 295
 296 static fcache*
 297 add_file_to_cache_tab (const char *file_path)
 298 {
 299
 300   FILE *fp = fopen (file_path, "r");
 301   if (fp == NULL)
 302     return NULL;
 303
 304   unsigned highest_use_count = 0;
 305   fcache *r = evicted_cache_tab_entry (&highest_use_count);
 306   r->file_path = file_path;
 307   if (r->fp)
 308     fclose (r->fp);
 309   r->fp = fp;
 310   r->nb_read = 0;
 311   r->line_start_idx = 0;
 312   r->line_num = 0;
 313   r->line_record.truncate (0);
 314   /* Ensure that this cache entry doesn't get evicted next time
 315      add_file_to_cache_tab is called.  */
 316   r->use_count = ++highest_use_count;
 317   r->total_lines = total_lines_num (file_path);
 318
 319   return r;
 320 }
 321
 322 /* Lookup the cache used for the content of a given file accessed by
 323    caret diagnostic.  If no cached file was found, create a new cache
 324    for this file, add it to the array of cached file and return
 325    it.  */
 326
 327 static fcache*
 328 lookup_or_add_file_to_cache_tab (const char *file_path)
 329 {
 330   fcache *r = lookup_file_in_cache_tab (file_path);
 331   if (r == NULL)
 332     r = add_file_to_cache_tab (file_path);
 333   return r;
 334 }
 335
 336 /* Default constructor for a cache of file used by caret
 337    diagnostic.  */
 338
 339 fcache::fcache ()
 340 : use_count (0), file_path (NULL), fp (NULL), data (0),
 341   size (0), nb_read (0), line_start_idx (0), line_num (0),
 342   total_lines (0)
 343 {
 344   line_record.create (0);
 345 }
 346
 347 /* Destructor for a cache of file used by caret diagnostic.  */
 348
 349 fcache::~fcache ()
 350 {
 351   if (fp)
 352     {
 353       fclose (fp);
 354       fp = NULL;
 355     }
 356   if (data)
 357     {
 358       XDELETEVEC (data);
 359       data = 0;
 360     }
 361   line_record.release ();
 362 }
 363
 364 /* Returns TRUE iff the cache would need to be filled with data coming
 365    from the file.  That is, either the cache is empty or full or the
 366    current line is empty.  Note that if the cache is full, it would
 367    need to be extended and filled again.  */
 368
 369 static bool
 370 needs_read (fcache *c)
 371 {
 372   return (c->nb_read == 0
 373           || c->nb_read == c->size
 374           || (c->line_start_idx >= c->nb_read - 1));
 375 }
 376
 377 /*  Return TRUE iff the cache is full and thus needs to be
 378     extended.  */
 379
 380 static bool
 381 needs_grow (fcache *c)
 382 {
 383   return c->nb_read == c->size;
 384 }
 385
 386 /* Grow the cache if it needs to be extended.  */
 387
 388 static void
 389 maybe_grow (fcache *c)
 390 {
 391   if (!needs_grow (c))
 392     return;
 393
 394   size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
 395   c->data = XRESIZEVEC (char, c->data, size + 1);
 396   c->size = size;
 397 }
 398
 399 /*  Read more data into the cache.  Extends the cache if need be.
 400     Returns TRUE iff new data could be read.  */
 401
 402 static bool
 403 read_data (fcache *c)
 404 {
 405   if (feof (c->fp) || ferror (c->fp))
 406     return false;
 407
 408   maybe_grow (c);
 409
 410   char * from = c->data + c->nb_read;
 411   size_t to_read = c->size - c->nb_read;
 412   size_t nb_read = fread (from, 1, to_read, c->fp);
 413
 414   if (ferror (c->fp))
 415     return false;
 416
 417   c->nb_read += nb_read;
 418   return !!nb_read;
 419 }
 420
 421 /* Read new data iff the cache needs to be filled with more data
 422    coming from the file FP.  Return TRUE iff the cache was filled with
 423    mode data.  */
 424
 425 static bool
 426 maybe_read_data (fcache *c)
 427 {
 428   if (!needs_read (c))
 429     return false;
 430   return read_data (c);
 431 }
 432
 433 /* Read a new line from file FP, using C as a cache for the data
 434    coming from the file.  Upon successful completion, *LINE is set to
 435    the beginning of the line found.  Space for that line has been
 436    allocated in the cache thus *LINE has the same life time as C.
 437    *LINE_LEN is set to the length of the line.  Note that the line
 438    does not contain any terminal delimiter.  This function returns
 439    true if some data was read or process from the cache, false
 440    otherwise.  Note that subsequent calls to get_next_line return the
 441    next lines of the file and might overwrite the content of
 442    *LINE.  */
 443
 444 static bool
 445 get_next_line (fcache *c, char **line, ssize_t *line_len)
 446 {
 447   /* Fill the cache with data to process.  */
 448   maybe_read_data (c);
 449
 450   size_t remaining_size = c->nb_read - c->line_start_idx;
 451   if (remaining_size == 0)
 452     /* There is no more data to process.  */
 453     return false;
 454
 455   char *line_start = c->data + c->line_start_idx;
 456
 457   char *next_line_start = NULL;
 458   size_t len = 0;
 459   char *line_end = (char *) memchr (line_start, '\n', remaining_size);
 460   if (line_end == NULL)
 461     {
 462       /* We haven't found the end-of-line delimiter in the cache.
 463          Fill the cache with more data from the file and look for the
 464          '\n'.  */
 465       while (maybe_read_data (c))
 466         {
 467           line_start = c->data + c->line_start_idx;
 468           remaining_size = c->nb_read - c->line_start_idx;
 469           line_end = (char *) memchr (line_start, '\n', remaining_size);
 470           if (line_end != NULL)
 471             {
 472               next_line_start = line_end + 1;
 473               break;
 474             }
 475         }
 476       if (line_end == NULL)
 477         /* We've loadded all the file into the cache and still no
 478            '\n'.  Let's say the line ends up at one byte passed the
 479            end of the file.  This is to stay consistent with the case
 480            of when the line ends up with a '\n' and line_end points to
 481            that terminal '\n'.  That consistency is useful below in
 482            the len calculation.  */
 483         line_end = c->data + c->nb_read ;
 484     }
 485   else
 486     next_line_start = line_end + 1;
 487
 488   if (ferror (c->fp))
 489     return -1;
 490
 491   /* At this point, we've found the end of the of line.  It either
 492      points to the '\n' or to one byte after the last byte of the
 493      file.  */
 494   gcc_assert (line_end != NULL);
 495
 496   len = line_end - line_start;
 497
 498   if (c->line_start_idx < c->nb_read)
 499     *line = line_start;
 500
 501   ++c->line_num;
 502
 503   /* Before we update our line record, make sure the hint about the
 504      total number of lines of the file is correct.  If it's not, then
 505      we give up recording line boundaries from now on.  */
 506   bool update_line_record = true;
 507   if (c->line_num > c->total_lines)
 508     update_line_record = false;
 509
 510     /* Now update our line record so that re-reading lines from the
 511      before c->line_start_idx is faster.  */
 512   if (update_line_record
 513       && c->line_record.length () < fcache_line_record_size)
 514     {
 515       /* If the file lines fits in the line record, we just record all
 516          its lines ...*/
 517       if (c->total_lines <= fcache_line_record_size
 518           && c->line_num > c->line_record.length ())
 519         c->line_record.safe_push (fcache::line_info (c->line_num,
 520                                                  c->line_start_idx,
 521                                                  line_end - c->data));
 522       else if (c->total_lines > fcache_line_record_size)
 523         {
 524           /* ... otherwise, we just scale total_lines down to
 525              (fcache_line_record_size lines.  */
 526           size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
 527           if (c->line_record.length () == 0
 528               || n >= c->line_record.length ())
 529             c->line_record.safe_push (fcache::line_info (c->line_num,
 530                                                      c->line_start_idx,
 531                                                      line_end - c->data));
 532         }
 533     }
 534
 535   /* Update c->line_start_idx so that it points to the next line to be
 536      read.  */
 537   if (next_line_start)
 538     c->line_start_idx = next_line_start - c->data;
 539   else
 540     /* We didn't find any terminal '\n'.  Let's consider that the end
 541        of line is the end of the data in the cache.  The next
 542        invocation of get_next_line will either read more data from the
 543        underlying file or return false early because we've reached the
 544        end of the file.  */
 545     c->line_start_idx = c->nb_read;
 546
 547   *line_len = len;
 548
 549   return true;
 550 }
 551
 552 /* Reads the next line from FILE into *LINE.  If *LINE is too small
 553    (or NULL) it is allocated (or extended) to have enough space to
 554    containe the line.  *LINE_LENGTH must contain the size of the
 555    initial*LINE buffer.  It's then updated by this function to the
 556    actual length of the returned line.  Note that the returned line
 557    can contain several zero bytes.  Also note that the returned string
 558    is allocated in static storage that is going to be re-used by
 559    subsequent invocations of read_line.  */
 560
 561 static bool
 562 read_next_line (fcache *cache, char ** line, ssize_t *line_len)
 563 {
 564   char *l = NULL;
 565   ssize_t len = 0;
 566
 567   if (!get_next_line (cache, &l, &len))
 568     return false;
 569
 570   if (*line == NULL)
 571     *line = XNEWVEC (char, len);
 572   else
 573     if (*line_len < len)
 574         *line = XRESIZEVEC (char, *line, len);
 575
 576   memcpy (*line, l, len);
 577   *line_len = len;
 578
 579   return true;
 580 }
 581
 582 /* Consume the next bytes coming from the cache (or from its
 583    underlying file if there are remaining unread bytes in the file)
 584    until we reach the next end-of-line (or end-of-file).  There is no
 585    copying from the cache involved.  Return TRUE upon successful
 586    completion.  */
 587
 588 static bool
 589 goto_next_line (fcache *cache)
 590 {
 591   char *l;
 592   ssize_t len;
 593
 594   return get_next_line (cache, &l, &len);
 595 }
 596
 597 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 598    The line is copied into *LINE.  *LINE_LEN must have been set to the
 599    length of *LINE.  If *LINE is too small (or NULL) it's extended (or
 600    allocated) and *LINE_LEN is adjusted accordingly.  *LINE ends up
 601    with a terminal zero byte and can contain additional zero bytes.
 602    This function returns bool if a line was read.  */
 603
 604 static bool
 605 read_line_num (fcache *c, size_t line_num,
 606                char ** line, ssize_t *line_len)
 607 {
 608   gcc_assert (line_num > 0);
 609
 610   if (line_num <= c->line_num)
 611     {
 612       /* We've been asked to read lines that are before c->line_num.
 613          So lets use our line record (if it's not empty) to try to
 614          avoid re-reading the file from the beginning again.  */
 615
 616       if (c->line_record.is_empty ())
 617         {
 618           c->line_start_idx = 0;
 619           c->line_num = 0;
 620         }
 621       else
 622         {
 623           fcache::line_info *i = NULL;
 624           if (c->total_lines <= fcache_line_record_size)
 625             {
 626               /* In languages where the input file is not totally
 627                  preprocessed up front, the c->total_lines hint
 628                  can be smaller than the number of lines of the
 629                  file.  In that case, only the first
 630                  c->total_lines have been recorded.
 631
 632                  Otherwise, the first c->total_lines we've read have
 633                  their start/end recorded here.  */
 634               i = (line_num <= c->total_lines)
 635                 ? &c->line_record[line_num - 1]
 636                 : &c->line_record[c->total_lines - 1];
 637               gcc_assert (i->line_num <= line_num);
 638             }
 639           else
 640             {
 641               /*  So the file had more lines than our line record
 642                   size.  Thus the number of lines we've recorded has
 643                   been scaled down to fcache_line_reacord_size.  Let's
 644                   pick the start/end of the recorded line that is
 645                   closest to line_num.  */
 646               size_t n = (line_num <= c->total_lines)
 647                 ? line_num * fcache_line_record_size / c->total_lines
 648                 : c ->line_record.length () - 1;
 649               if (n < c->line_record.length ())
 650                 {
 651                   i = &c->line_record[n];
 652                   gcc_assert (i->line_num <= line_num);
 653                 }
 654             }
 655
 656           if (i && i->line_num == line_num)
 657             {
 658               /* We have the start/end of the line.  Let's just copy
 659                  it again and we are done.  */
 660               ssize_t len = i->end_pos - i->start_pos + 1;
 661               if (*line_len < len)
 662                 *line = XRESIZEVEC (char, *line, len);
 663               memmove (*line, c->data + i->start_pos, len);
 664               (*line)[len - 1] = '\0';
 665               *line_len = --len;
 666               return true;
 667             }
 668
 669           if (i)
 670             {
 671               c->line_start_idx = i->start_pos;
 672               c->line_num = i->line_num - 1;
 673             }
 674           else
 675             {
 676               c->line_start_idx = 0;
 677               c->line_num = 0;
 678             }
 679         }
 680     }
 681
 682   /*  Let's walk from line c->line_num up to line_num - 1, without
 683       copying any line.  */
 684   while (c->line_num < line_num - 1)
 685     if (!goto_next_line (c))
 686       return false;
 687
 688   /* The line we want is the next one.  Let's read and copy it back to
 689      the caller.  */
 690   return read_next_line (c, line, line_len);
 691 }
 692
 693 /* Return the physical source line that corresponds to FILE_PATH/LINE in a
 694    buffer that is statically allocated.  The newline is replaced by
 695    the null character.  Note that the line can contain several null
 696    characters, so LINE_LEN, if non-null, points to the actual length
 697    of the line.  */
 698
 699 const char *
 700 location_get_source_line (const char *file_path, int line,
 701                           int *line_len)
 702 {
 703   static char *buffer;
 704   static ssize_t len;
 705
 706   if (line == 0)
 707     return NULL;
 708
 709   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 710   if (c == NULL)
 711     return NULL;
 712
 713   bool read = read_line_num (c, line, &buffer, &len);
 714
 715   if (read && line_len)
 716     *line_len = len;
 717
 718   return read ? buffer : NULL;
 719 }
 720
 721 /* Test if the location originates from the spelling location of a
 722    builtin-tokens.  That is, return TRUE if LOC is a (possibly
 723    virtual) location of a built-in token that appears in the expansion
 724    list of a macro.  Please note that this function also works on
 725    tokens that result from built-in tokens.  For instance, the
 726    function would return true if passed a token "4" that is the result
 727    of the expansion of the built-in __LINE__ macro.  */
 728 bool
 729 is_location_from_builtin_token (source_location loc)
 730 {
 731   const line_map_ordinary *map = NULL;
 732   loc = linemap_resolve_location (line_table, loc,
 733                                   LRK_SPELLING_LOCATION, &map);
 734   return loc == BUILTINS_LOCATION;
 735 }
 736
 737 /* Expand the source location LOC into a human readable location.  If
 738    LOC is virtual, it resolves to the expansion point of the involved
 739    macro.  If LOC resolves to a builtin location, the file name of the
 740    readable location is set to the string "<built-in>".  */
 741
 742 expanded_location
 743 expand_location (source_location loc)
 744 {
 745   return expand_location_1 (loc, /*expansion_point_p=*/true);
 746 }
 747
 748 /* Expand the source location LOC into a human readable location.  If
 749    LOC is virtual, it resolves to the expansion location of the
 750    relevant macro.  If LOC resolves to a builtin location, the file
 751    name of the readable location is set to the string
 752    "<built-in>".  */
 753
 754 expanded_location
 755 expand_location_to_spelling_point (source_location loc)
 756 {
 757   return expand_location_1 (loc, /*expansion_point_p=*/false);
 758 }
 759
 760 /* The rich_location class within libcpp requires a way to expand
 761    source_location instances, and relies on the client code
 762    providing a symbol named
 763      linemap_client_expand_location_to_spelling_point
 764    to do this.
 765
 766    This is the implementation for libcommon.a (all host binaries),
 767    which simply calls into expand_location_to_spelling_point.  */
 768
 769 expanded_location
 770 linemap_client_expand_location_to_spelling_point (source_location loc)
 771 {
 772   return expand_location_to_spelling_point (loc);
 773 }
 774
 775
 776 /* If LOCATION is in a system header and if it is a virtual location for
 777    a token coming from the expansion of a macro, unwind it to the
 778    location of the expansion point of the macro.  Otherwise, just return
 779    LOCATION.
 780
 781    This is used for instance when we want to emit diagnostics about a
 782    token that may be located in a macro that is itself defined in a
 783    system header, for example, for the NULL macro.  In such a case, if
 784    LOCATION were passed directly to diagnostic functions such as
 785    warning_at, the diagnostic would be suppressed (unless
 786    -Wsystem-headers).  */
 787
 788 source_location
 789 expansion_point_location_if_in_system_header (source_location location)
 790 {
 791   if (in_system_header_at (location))
 792     location = linemap_resolve_location (line_table, location,
 793                                          LRK_MACRO_EXPANSION_POINT,
 794                                          NULL);
 795   return location;
 796 }
 797
 798 /* If LOCATION is a virtual location for a token coming from the expansion
 799    of a macro, unwind to the location of the expansion point of the macro.  */
 800
 801 source_location
 802 expansion_point_location (source_location location)
 803 {
 804   return linemap_resolve_location (line_table, location,
 805                                    LRK_MACRO_EXPANSION_POINT, NULL);
 806 }
 807
 808 /* Given location LOC, strip away any packed range information
 809    or ad-hoc information.  */
 810
 811 location_t
 812 get_pure_location (location_t loc)
 813 {
 814   if (IS_ADHOC_LOC (loc))
 815     loc
 816       = line_table->location_adhoc_data_map.data[loc & MAX_SOURCE_LOCATION].locus;
 817
 818   if (loc >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
 819     return loc;
 820
 821   if (loc < RESERVED_LOCATION_COUNT)
 822     return loc;
 823
 824   const line_map *map = linemap_lookup (line_table, loc);
 825   const line_map_ordinary *ordmap = linemap_check_ordinary (map);
 826
 827   return loc & ~((1 << ordmap->m_range_bits) - 1);
 828 }
 829
 830 /* Construct a location with caret at CARET, ranging from START to
 831    finish e.g.
 832
 833                  11111111112
 834         12345678901234567890
 835      522
 836      523   return foo + bar;
 837                   ~~~~^~~~~
 838      524
 839
 840    The location's caret is at the "+", line 523 column 15, but starts
 841    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
 842    of "bar" at column 19.  */
 843
 844 location_t
 845 make_location (location_t caret, location_t start, location_t finish)
 846 {
 847   location_t pure_loc = get_pure_location (caret);
 848   source_range src_range;
 849   src_range.m_start = start;
 850   src_range.m_finish = finish;
 851   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
 852                                                    pure_loc,
 853                                                    src_range,
 854                                                    NULL);
 855   return combined_loc;
 856 }
 857
 858 #define ONE_K 1024
 859 #define ONE_M (ONE_K * ONE_K)
 860
 861 /* Display a number as an integer multiple of either:
 862    - 1024, if said integer is >= to 10 K (in base 2)
 863    - 1024 * 1024, if said integer is >= 10 M in (base 2)
 864  */
 865 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
 866                   ? (x) \
 867                   : ((x) < 10 * ONE_M \
 868                      ? (x) / ONE_K \
 869                      : (x) / ONE_M)))
 870
 871 /* For a given integer, display either:
 872    - the character 'k', if the number is higher than 10 K (in base 2)
 873      but strictly lower than 10 M (in base 2)
 874    - the character 'M' if the number is higher than 10 M (in base2)
 875    - the charcter ' ' if the number is strictly lower  than 10 K  */
 876 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
 877
 878 /* Display an integer amount as multiple of 1K or 1M (in base 2).
 879    Display the correct unit (either k, M, or ' ') after the amout, as
 880    well.  */
 881 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
 882
 883 /* Dump statistics to stderr about the memory usage of the line_table
 884    set of line maps.  This also displays some statistics about macro
 885    expansion.  */
 886
 887 void
 888 dump_line_table_statistics (void)
 889 {
 890   struct linemap_stats s;
 891   long total_used_map_size,
 892     macro_maps_size,
 893     total_allocated_map_size;
 894
 895   memset (&s, 0, sizeof (s));
 896
 897   linemap_get_statistics (line_table, &s);
 898
 899   macro_maps_size = s.macro_maps_used_size
 900     + s.macro_maps_locations_size;
 901
 902   total_allocated_map_size = s.ordinary_maps_allocated_size
 903     + s.macro_maps_allocated_size
 904     + s.macro_maps_locations_size;
 905
 906   total_used_map_size = s.ordinary_maps_used_size
 907     + s.macro_maps_used_size
 908     + s.macro_maps_locations_size;
 909
 910   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
 911            s.num_expanded_macros);
 912   if (s.num_expanded_macros != 0)
 913     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
 914              s.num_macro_tokens / s.num_expanded_macros);
 915   fprintf (stderr,
 916            "\nLine Table allocations during the "
 917            "compilation process\n");
 918   fprintf (stderr, "Number of ordinary maps used:        %5ld%c\n",
 919            SCALE (s.num_ordinary_maps_used),
 920            STAT_LABEL (s.num_ordinary_maps_used));
 921   fprintf (stderr, "Ordinary map used size:              %5ld%c\n",
 922            SCALE (s.ordinary_maps_used_size),
 923            STAT_LABEL (s.ordinary_maps_used_size));
 924   fprintf (stderr, "Number of ordinary maps allocated:   %5ld%c\n",
 925            SCALE (s.num_ordinary_maps_allocated),
 926            STAT_LABEL (s.num_ordinary_maps_allocated));
 927   fprintf (stderr, "Ordinary maps allocated size:        %5ld%c\n",
 928            SCALE (s.ordinary_maps_allocated_size),
 929            STAT_LABEL (s.ordinary_maps_allocated_size));
 930   fprintf (stderr, "Number of macro maps used:           %5ld%c\n",
 931            SCALE (s.num_macro_maps_used),
 932            STAT_LABEL (s.num_macro_maps_used));
 933   fprintf (stderr, "Macro maps used size:                %5ld%c\n",
 934            SCALE (s.macro_maps_used_size),
 935            STAT_LABEL (s.macro_maps_used_size));
 936   fprintf (stderr, "Macro maps locations size:           %5ld%c\n",
 937            SCALE (s.macro_maps_locations_size),
 938            STAT_LABEL (s.macro_maps_locations_size));
 939   fprintf (stderr, "Macro maps size:                     %5ld%c\n",
 940            SCALE (macro_maps_size),
 941            STAT_LABEL (macro_maps_size));
 942   fprintf (stderr, "Duplicated maps locations size:      %5ld%c\n",
 943            SCALE (s.duplicated_macro_maps_locations_size),
 944            STAT_LABEL (s.duplicated_macro_maps_locations_size));
 945   fprintf (stderr, "Total allocated maps size:           %5ld%c\n",
 946            SCALE (total_allocated_map_size),
 947            STAT_LABEL (total_allocated_map_size));
 948   fprintf (stderr, "Total used maps size:                %5ld%c\n",
 949            SCALE (total_used_map_size),
 950            STAT_LABEL (total_used_map_size));
 951   fprintf (stderr, "Ad-hoc table size:                   %5ld%c\n",
 952            SCALE (s.adhoc_table_size),
 953            STAT_LABEL (s.adhoc_table_size));
 954   fprintf (stderr, "Ad-hoc table entries used:           %5ld\n",
 955            s.adhoc_table_entries_used);
 956   fprintf (stderr, "optimized_ranges: %i\n",
 957            line_table->num_optimized_ranges);
 958   fprintf (stderr, "unoptimized_ranges: %i\n",
 959            line_table->num_unoptimized_ranges);
 960
 961   fprintf (stderr, "\n");
 962 }
 963
 964 /* Get location one beyond the final location in ordinary map IDX.  */
 965
 966 static source_location
 967 get_end_location (struct line_maps *set, unsigned int idx)
 968 {
 969   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
 970     return set->highest_location;
 971
 972   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
 973   return MAP_START_LOCATION (next_map);
 974 }
 975
 976 /* Helper function for write_digit_row.  */
 977
 978 static void
 979 write_digit (FILE *stream, int digit)
 980 {
 981   fputc ('0' + (digit % 10), stream);
 982 }
 983
 984 /* Helper function for dump_location_info.
 985    Write a row of numbers to STREAM, numbering a source line,
 986    giving the units, tens, hundreds etc of the column number.  */
 987
 988 static void
 989 write_digit_row (FILE *stream, int indent,
 990                  const line_map_ordinary *map,
 991                  source_location loc, int max_col, int divisor)
 992 {
 993   fprintf (stream, "%*c", indent, ' ');
 994   fprintf (stream, "|");
 995   for (int column = 1; column < max_col; column++)
 996     {
 997       source_location column_loc = loc + (column << map->m_range_bits);
 998       write_digit (stream, column_loc / divisor);
 999     }
1000   fprintf (stream, "\n");
1001 }
1002
1003 /* Write a half-closed (START) / half-open (END) interval of
1004    source_location to STREAM.  */
1005
1006 static void
1007 dump_location_range (FILE *stream,
1008                      source_location start, source_location end)
1009 {
1010   fprintf (stream,
1011            "  source_location interval: %u <= loc < %u\n",
1012            start, end);
1013 }
1014
1015 /* Write a labelled description of a half-closed (START) / half-open (END)
1016    interval of source_location to STREAM.  */
1017
1018 static void
1019 dump_labelled_location_range (FILE *stream,
1020                               const char *name,
1021                               source_location start, source_location end)
1022 {
1023   fprintf (stream, "%s\n", name);
1024   dump_location_range (stream, start, end);
1025   fprintf (stream, "\n");
1026 }
1027
1028 /* Write a visualization of the locations in the line_table to STREAM.  */
1029
1030 void
1031 dump_location_info (FILE *stream)
1032 {
1033   /* Visualize the reserved locations.  */
1034   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1035                                 0, RESERVED_LOCATION_COUNT);
1036
1037   /* Visualize the ordinary line_map instances, rendering the sources. */
1038   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1039     {
1040       source_location end_location = get_end_location (line_table, idx);
1041       /* half-closed: doesn't include this one. */
1042
1043       const line_map_ordinary *map
1044         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1045       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1046       dump_location_range (stream,
1047                            MAP_START_LOCATION (map), end_location);
1048       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1049       fprintf (stream, "  starting at line: %i\n",
1050                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1051       fprintf (stream, "  column and range bits: %i\n",
1052                map->m_column_and_range_bits);
1053       fprintf (stream, "  column bits: %i\n",
1054                map->m_column_and_range_bits - map->m_range_bits);
1055       fprintf (stream, "  range bits: %i\n",
1056                map->m_range_bits);
1057
1058       /* Render the span of source lines that this "map" covers.  */
1059       for (source_location loc = MAP_START_LOCATION (map);
1060            loc < end_location;
1061            loc += (1 << map->m_range_bits) )
1062         {
1063           gcc_assert (pure_location_p (line_table, loc) );
1064
1065           expanded_location exploc
1066             = linemap_expand_location (line_table, map, loc);
1067
1068           if (0 == exploc.column)
1069             {
1070               /* Beginning of a new source line: draw the line.  */
1071
1072               int line_size;
1073               const char *line_text = location_get_source_line (exploc.file,
1074                                                                 exploc.line,
1075                                                                 &line_size);
1076               if (!line_text)
1077                 break;
1078               fprintf (stream,
1079                        "%s:%3i|loc:%5i|%.*s\n",
1080                        exploc.file, exploc.line,
1081                        loc,
1082                        line_size, line_text);
1083
1084               /* "loc" is at column 0, which means "the whole line".
1085                  Render the locations *within* the line, by underlining
1086                  it, showing the source_location numeric values
1087                  at each column.  */
1088               int max_col = (1 << map->m_column_and_range_bits) - 1;
1089               if (max_col > line_size)
1090                 max_col = line_size + 1;
1091
1092               int indent = 14 + strlen (exploc.file);
1093
1094               /* Thousands.  */
1095               if (end_location > 999)
1096                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1097
1098               /* Hundreds.  */
1099               if (end_location > 99)
1100                 write_digit_row (stream, indent, map, loc, max_col, 100);
1101
1102               /* Tens.  */
1103               write_digit_row (stream, indent, map, loc, max_col, 10);
1104
1105               /* Units.  */
1106               write_digit_row (stream, indent, map, loc, max_col, 1);
1107             }
1108         }
1109       fprintf (stream, "\n");
1110     }
1111
1112   /* Visualize unallocated values.  */
1113   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1114                                 line_table->highest_location,
1115                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1116
1117   /* Visualize the macro line_map instances, rendering the sources. */
1118   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1119     {
1120       /* Each macro map that is allocated owns source_location values
1121          that are *lower* that the one before them.
1122          Hence it's meaningful to view them either in order of ascending
1123          source locations, or in order of ascending macro map index.  */
1124       const bool ascending_source_locations = true;
1125       unsigned int idx = (ascending_source_locations
1126                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1127                           : i);
1128       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1129       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1130                idx,
1131                linemap_map_get_macro_name (map),
1132                MACRO_MAP_NUM_MACRO_TOKENS (map));
1133       dump_location_range (stream,
1134                            map->start_location,
1135                            (map->start_location
1136                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1137       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1138               "expansion point is location %i",
1139               MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1140       fprintf (stream, "  map->start_location: %u\n",
1141                map->start_location);
1142
1143       fprintf (stream, "  macro_locations:\n");
1144       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1145         {
1146           source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1147           source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1148
1149           /* linemap_add_macro_token encodes token numbers in an expansion
1150              by putting them after MAP_START_LOCATION. */
1151
1152           /* I'm typically seeing 4 uninitialized entries at the end of
1153              0xafafafaf.
1154              This appears to be due to macro.c:replace_args
1155              adding 2 extra args for padding tokens; presumably there may
1156              be a leading and/or trailing padding token injected,
1157              each for 2 more location slots.
1158              This would explain there being up to 4 source_locations slots
1159              that may be uninitialized.  */
1160
1161           fprintf (stream, "    %u: %u, %u\n",
1162                    i,
1163                    x,
1164                    y);
1165           if (x == y)
1166             {
1167               if (x < MAP_START_LOCATION (map))
1168                 inform (x, "token %u has x-location == y-location == %u", i, x);
1169               else
1170                 fprintf (stream,
1171                          "x-location == y-location == %u encodes token # %u\n",
1172                          x, x - MAP_START_LOCATION (map));
1173                 }
1174           else
1175             {
1176               inform (x, "token %u has x-location == %u", i, x);
1177               inform (x, "token %u has y-location == %u", i, y);
1178             }
1179         }
1180       fprintf (stream, "\n");
1181     }
1182
1183   /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1184      macro map, presumably due to an off-by-one error somewhere
1185      between the logic in linemap_enter_macro and
1186      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1187   dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1188                                 MAX_SOURCE_LOCATION,
1189                                 MAX_SOURCE_LOCATION + 1);
1190
1191   /* Visualize ad-hoc values.  */
1192   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1193                                 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1194 }
1195
1196 /* string_concat's constructor.  */
1197
1198 string_concat::string_concat (int num, location_t *locs)
1199   : m_num (num)
1200 {
1201   m_locs = ggc_vec_alloc <location_t> (num);
1202   for (int i = 0; i < num; i++)
1203     m_locs[i] = locs[i];
1204 }
1205
1206 /* string_concat_db's constructor.  */
1207
1208 string_concat_db::string_concat_db ()
1209 {
1210   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1211 }
1212
1213 /* Record that a string concatenation occurred, covering NUM
1214    string literal tokens.  LOCS is an array of size NUM, containing the
1215    locations of the tokens.  A copy of LOCS is taken.  */
1216
1217 void
1218 string_concat_db::record_string_concatenation (int num, location_t *locs)
1219 {
1220   gcc_assert (num > 1);
1221   gcc_assert (locs);
1222
1223   location_t key_loc = get_key_loc (locs[0]);
1224
1225   string_concat *concat
1226     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1227   m_table->put (key_loc, concat);
1228 }
1229
1230 /* Determine if LOC was the location of the the initial token of a
1231    concatenation of string literal tokens.
1232    If so, *OUT_NUM is written to with the number of tokens, and
1233    *OUT_LOCS with the location of an array of locations of the
1234    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1235    storage owned by the string_concat_db.
1236    Otherwise, return false.  */
1237
1238 bool
1239 string_concat_db::get_string_concatenation (location_t loc,
1240                                             int *out_num,
1241                                             location_t **out_locs)
1242 {
1243   gcc_assert (out_num);
1244   gcc_assert (out_locs);
1245
1246   location_t key_loc = get_key_loc (loc);
1247
1248   string_concat **concat = m_table->get (key_loc);
1249   if (!concat)
1250     return false;
1251
1252   *out_num = (*concat)->m_num;
1253   *out_locs =(*concat)->m_locs;
1254   return true;
1255 }
1256
1257 /* Internal function.  Canonicalize LOC into a form suitable for
1258    use as a key within the database, stripping away macro expansion,
1259    ad-hoc information, and range information, using the location of
1260    the start of LOC within an ordinary linemap.  */
1261
1262 location_t
1263 string_concat_db::get_key_loc (location_t loc)
1264 {
1265   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1266                                   NULL);
1267
1268   loc = get_range_from_loc (line_table, loc).m_start;
1269
1270   return loc;
1271 }
1272
1273 /* Helper class for use within get_substring_ranges_for_loc.
1274    An vec of cpp_string with responsibility for releasing all of the
1275    str->text for each str in the vector.  */
1276
1277 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1278 {
1279  public:
1280   auto_cpp_string_vec (int alloc)
1281     : auto_vec <cpp_string> (alloc) {}
1282
1283   ~auto_cpp_string_vec ()
1284   {
1285     /* Clean up the copies within this vec.  */
1286     int i;
1287     cpp_string *str;
1288     FOR_EACH_VEC_ELT (*this, i, str)
1289       free (const_cast <unsigned char *> (str->text));
1290   }
1291 };
1292
1293 /* Attempt to populate RANGES with source location information on the
1294    individual characters within the string literal found at STRLOC.
1295    If CONCATS is non-NULL, then any string literals that the token at
1296    STRLOC  was concatenated with are also added to RANGES.
1297
1298    Return NULL if successful, or an error message if any errors occurred (in
1299    which case RANGES may be only partially populated and should not
1300    be used).
1301
1302    This is implemented by re-parsing the relevant source line(s).  */
1303
1304 static const char *
1305 get_substring_ranges_for_loc (cpp_reader *pfile,
1306                               string_concat_db *concats,
1307                               location_t strloc,
1308                               enum cpp_ttype type,
1309                               cpp_substring_ranges &ranges)
1310 {
1311   gcc_assert (pfile);
1312
1313   if (strloc == UNKNOWN_LOCATION)
1314     return "unknown location";
1315
1316   /* If string concatenation has occurred at STRLOC, get the locations
1317      of all of the literal tokens making up the compound string.
1318      Otherwise, just use STRLOC.  */
1319   int num_locs = 1;
1320   location_t *strlocs = &strloc;
1321   if (concats)
1322     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1323
1324   auto_cpp_string_vec strs (num_locs);
1325   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1326   for (int i = 0; i < num_locs; i++)
1327     {
1328       /* Get range of strloc.  We will use it to locate the start and finish
1329          of the literal token within the line.  */
1330       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1331
1332       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1333         /* If the string is within a macro expansion, we can't get at the
1334            end location.  */
1335         return "macro expansion";
1336
1337       if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1338         /* If so, we can't reliably determine where the token started within
1339            its line.  */
1340         return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1341
1342       if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1343         /* If so, we can't reliably determine where the token finished within
1344            its line.  */
1345         return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1346
1347       expanded_location start
1348         = expand_location_to_spelling_point (src_range.m_start);
1349       expanded_location finish
1350         = expand_location_to_spelling_point (src_range.m_finish);
1351       if (start.file != finish.file)
1352         return "range endpoints are in different files";
1353       if (start.line != finish.line)
1354         return "range endpoints are on different lines";
1355       if (start.column > finish.column)
1356         return "range endpoints are reversed";
1357
1358       int line_width;
1359       const char *line = location_get_source_line (start.file, start.line,
1360                                                    &line_width);
1361       if (line == NULL)
1362         return "unable to read source line";
1363
1364       /* Determine the location of the literal (including quotes
1365          and leading prefix chars, such as the 'u' in a u""
1366          token).  */
1367       const char *literal = line + start.column - 1;
1368       int literal_length = finish.column - start.column + 1;
1369
1370       gcc_assert (line_width >= (start.column - 1 + literal_length));
1371       cpp_string from;
1372       from.len = literal_length;
1373       /* Make a copy of the literal, to avoid having to rely on
1374          the lifetime of the copy of the line within the cache.
1375          This will be released by the auto_cpp_string_vec dtor.  */
1376       from.text = XDUPVEC (unsigned char, literal, literal_length);
1377       strs.safe_push (from);
1378
1379       /* For very long lines, a new linemap could have started
1380          halfway through the token.
1381          Ensure that the loc_reader uses the linemap of the
1382          *end* of the token for its start location.  */
1383       const line_map_ordinary *final_ord_map;
1384       linemap_resolve_location (line_table, src_range.m_finish,
1385                                 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1386       location_t start_loc
1387         = linemap_position_for_line_and_column (line_table, final_ord_map,
1388                                                 start.line, start.column);
1389
1390       cpp_string_location_reader loc_reader (start_loc, line_table);
1391       loc_readers.safe_push (loc_reader);
1392     }
1393
1394   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1395   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1396                                                  loc_readers.address (),
1397                                                  num_locs, &ranges, type);
1398   if (err)
1399     return err;
1400
1401   /* Success: "ranges" should now contain information on the string.  */
1402   return NULL;
1403 }
1404
1405 /* Attempt to populate *OUT_RANGE with source location information on the
1406    range of given characters within the string literal found at STRLOC.
1407    START_IDX and END_IDX refer to offsets within the execution character
1408    set.
1409    If CONCATS is non-NULL, then any string literals that the token at
1410    STRLOC was concatenated with are also considered.
1411
1412    This is implemented by re-parsing the relevant source line(s).
1413
1414    Return NULL if successful, or an error message if any errors occurred.
1415    Error messages are intended for GCC developers (to help debugging) rather
1416    than for end-users.  */
1417
1418 const char *
1419 get_source_range_for_substring (cpp_reader *pfile,
1420                                 string_concat_db *concats,
1421                                 location_t strloc,
1422                                 enum cpp_ttype type,
1423                                 int start_idx, int end_idx,
1424                                 source_range *out_range)
1425 {
1426   gcc_checking_assert (start_idx >= 0);
1427   gcc_checking_assert (end_idx >= 0);
1428   gcc_assert (out_range);
1429
1430   cpp_substring_ranges ranges;
1431   const char *err
1432     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1433   if (err)
1434     return err;
1435
1436   if (start_idx >= ranges.get_num_ranges ())
1437     return "start_idx out of range";
1438   if (end_idx >= ranges.get_num_ranges ())
1439     return "end_idx out of range";
1440
1441   out_range->m_start = ranges.get_range (start_idx).m_start;
1442   out_range->m_finish = ranges.get_range (end_idx).m_finish;
1443   return NULL;
1444 }
1445
1446 /* As get_source_range_for_substring, but write to *OUT the number
1447    of ranges that are available.  */
1448
1449 const char *
1450 get_num_source_ranges_for_substring (cpp_reader *pfile,
1451                                      string_concat_db *concats,
1452                                      location_t strloc,
1453                                      enum cpp_ttype type,
1454                                      int *out)
1455 {
1456   gcc_assert (out);
1457
1458   cpp_substring_ranges ranges;
1459   const char *err
1460     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1461
1462   if (err)
1463     return err;
1464
1465   *out = ranges.get_num_ranges ();
1466   return NULL;
1467 }
1468
1469 #if CHECKING_P
1470
1471 namespace selftest {
1472
1473 /* Selftests of location handling.  */
1474
1475 /* A class for writing out a temporary sourcefile for use in selftests
1476    of input handling.  */
1477
1478 class temp_source_file
1479 {
1480  public:
1481   temp_source_file (const location &loc, const char *suffix,
1482                     const char *content);
1483   ~temp_source_file ();
1484
1485   const char *get_filename () const { return m_filename; }
1486
1487  private:
1488   char *m_filename;
1489 };
1490
1491 /* Constructor.  Create a tempfile using SUFFIX, and write CONTENT to
1492    it.  Abort if anything goes wrong, using LOC as the effective
1493    location in the problem report.  */
1494
1495 temp_source_file::temp_source_file (const location &loc, const char *suffix,
1496                                     const char *content)
1497 {
1498   m_filename = make_temp_file (suffix);
1499   ASSERT_NE (m_filename, NULL);
1500
1501   FILE *out = fopen (m_filename, "w");
1502   if (!out)
1503     ::selftest::fail_formatted (loc, "unable to open tempfile: %s",
1504                                 m_filename);
1505   fprintf (out, "%s", content);
1506   fclose (out);
1507 }
1508
1509 /* Destructor.  Delete the tempfile.  */
1510
1511 temp_source_file::~temp_source_file ()
1512 {
1513   unlink (m_filename);
1514   free (m_filename);
1515 }
1516
1517 /* Helper function for verifying location data: when location_t
1518    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1519    as having column 0.  */
1520
1521 static bool
1522 should_have_column_data_p (location_t loc)
1523 {
1524   if (IS_ADHOC_LOC (loc))
1525     loc = get_location_from_adhoc_loc (line_table, loc);
1526   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1527     return false;
1528   return true;
1529 }
1530
1531 /* Selftest for should_have_column_data_p.  */
1532
1533 static void
1534 test_should_have_column_data_p ()
1535 {
1536   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1537   ASSERT_TRUE
1538     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1539   ASSERT_FALSE
1540     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1541 }
1542
1543 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1544    on LOC.  */
1545
1546 static void
1547 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1548               location_t loc)
1549 {
1550   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1551   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1552   /* If location_t values are sufficiently high, then column numbers
1553      will be unavailable and LOCATION_COLUMN (loc) will be 0.
1554      When close to the threshold, column numbers *may* be present: if
1555      the final linemap before the threshold contains a line that straddles
1556      the threshold, locations in that line have column information.  */
1557   if (should_have_column_data_p (loc))
1558     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1559 }
1560
1561 /* Various selftests in this file involve constructing a line table
1562    and one or more line maps within it.
1563
1564    For maximum test coverage we want to run these tests with a variety
1565    of situations:
1566    - line_table->default_range_bits: some frontends use a non-zero value
1567    and others use zero
1568    - the fallback modes within line-map.c: there are various threshold
1569    values for source_location/location_t beyond line-map.c changes
1570    behavior (disabling of the range-packing optimization, disabling
1571    of column-tracking).  We can exercise these by starting the line_table
1572    at interesting values at or near these thresholds.
1573
1574    The following struct describes a particular case within our test
1575    matrix.  */
1576
1577 struct line_table_case
1578 {
1579   line_table_case (int default_range_bits, int base_location)
1580   : m_default_range_bits (default_range_bits),
1581     m_base_location (base_location)
1582   {}
1583
1584   int m_default_range_bits;
1585   int m_base_location;
1586 };
1587
1588 /* A class for overriding the global "line_table" within a selftest,
1589    restoring its value afterwards.  */
1590
1591 class temp_line_table
1592 {
1593  public:
1594   temp_line_table (const line_table_case &);
1595   ~temp_line_table ();
1596
1597  private:
1598   line_maps *m_old_line_table;
1599 };
1600
1601 /* Constructor.  Store the old value of line_table, and create a new
1602    one, using the sitation described in CASE_.  */
1603
1604 temp_line_table::temp_line_table (const line_table_case &case_)
1605   : m_old_line_table (line_table)
1606 {
1607   line_table = ggc_alloc<line_maps> ();
1608   linemap_init (line_table, BUILTINS_LOCATION);
1609   line_table->reallocator = m_old_line_table->reallocator;
1610   line_table->round_alloc_size = m_old_line_table->round_alloc_size;
1611   line_table->default_range_bits = case_.m_default_range_bits;
1612   if (case_.m_base_location)
1613     {
1614       line_table->highest_location = case_.m_base_location;
1615       line_table->highest_line = case_.m_base_location;
1616     }
1617 }
1618
1619 /* Destructor.  Restore the old value of line_table.  */
1620
1621 temp_line_table::~temp_line_table ()
1622 {
1623   line_table = m_old_line_table;
1624 }
1625
1626 /* Verify basic operation of ordinary linemaps.  */
1627
1628 static void
1629 test_accessing_ordinary_linemaps (const line_table_case &case_)
1630 {
1631   temp_line_table tmp_lt (case_);
1632
1633   /* Build a simple linemap describing some locations. */
1634   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1635
1636   linemap_line_start (line_table, 1, 100);
1637   location_t loc_a = linemap_position_for_column (line_table, 1);
1638   location_t loc_b = linemap_position_for_column (line_table, 23);
1639
1640   linemap_line_start (line_table, 2, 100);
1641   location_t loc_c = linemap_position_for_column (line_table, 1);
1642   location_t loc_d = linemap_position_for_column (line_table, 17);
1643
1644   /* Example of a very long line.  */
1645   linemap_line_start (line_table, 3, 2000);
1646   location_t loc_e = linemap_position_for_column (line_table, 700);
1647
1648   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1649
1650   /* Multiple files.  */
1651   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1652   linemap_line_start (line_table, 1, 200);
1653   location_t loc_f = linemap_position_for_column (line_table, 150);
1654   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1655
1656   /* Verify that we can recover the location info.  */
1657   assert_loceq ("foo.c", 1, 1, loc_a);
1658   assert_loceq ("foo.c", 1, 23, loc_b);
1659   assert_loceq ("foo.c", 2, 1, loc_c);
1660   assert_loceq ("foo.c", 2, 17, loc_d);
1661   assert_loceq ("foo.c", 3, 700, loc_e);
1662   assert_loceq ("bar.c", 1, 150, loc_f);
1663
1664   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1665   ASSERT_TRUE (pure_location_p (line_table, loc_a));
1666
1667   /* Verify using make_location to build a range, and extracting data
1668      back from it.  */
1669   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1670   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1671   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1672   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1673   ASSERT_EQ (loc_b, src_range.m_start);
1674   ASSERT_EQ (loc_d, src_range.m_finish);
1675 }
1676
1677 /* Verify various properties of UNKNOWN_LOCATION.  */
1678
1679 static void
1680 test_unknown_location ()
1681 {
1682   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1683   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1684   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1685 }
1686
1687 /* Verify various properties of BUILTINS_LOCATION.  */
1688
1689 static void
1690 test_builtins ()
1691 {
1692   assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1693   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1694 }
1695
1696 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
1697
1698 static void
1699 test_reading_source_line ()
1700 {
1701   /* Create a tempfile and write some text to it.  */
1702   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1703                         "01234567890123456789\n"
1704                         "This is the test text\n"
1705                         "This is the 3rd line\n");
1706
1707   /* Read back a specific line from the tempfile.  */
1708   int line_size;
1709   const char *source_line = location_get_source_line (tmp.get_filename (),
1710                                                       2, &line_size);
1711   ASSERT_TRUE (source_line != NULL);
1712   ASSERT_EQ (21, line_size);
1713   if (!strncmp ("This is the test text",
1714                 source_line, line_size))
1715     ::selftest::pass (SELFTEST_LOCATION,
1716                       "source_line matched expected value");
1717   else
1718     ::selftest::fail (SELFTEST_LOCATION,
1719                       "source_line did not match expected value");
1720
1721 }
1722
1723 /* Tests of lexing.  */
1724
1725 /* Verify that token TOK from PARSER has cpp_token_as_text
1726    equal to EXPECTED_TEXT.  */
1727
1728 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
1729   SELFTEST_BEGIN_STMT                                                   \
1730     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
1731     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
1732   SELFTEST_END_STMT
1733
1734 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1735    and ranges from EXP_START_COL to EXP_FINISH_COL.
1736    Use LOC as the effective location of the selftest.  */
1737
1738 static void
1739 assert_token_loc_eq (const location &loc,
1740                      const cpp_token *tok,
1741                      const char *exp_filename, int exp_linenum,
1742                      int exp_start_col, int exp_finish_col)
1743 {
1744   location_t tok_loc = tok->src_loc;
1745   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1746   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1747
1748   /* If location_t values are sufficiently high, then column numbers
1749      will be unavailable.  */
1750   if (!should_have_column_data_p (tok_loc))
1751     return;
1752
1753   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1754   source_range tok_range = get_range_from_loc (line_table, tok_loc);
1755   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1756   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1757 }
1758
1759 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1760    SELFTEST_LOCATION as the effective location of the selftest.  */
1761
1762 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1763                             EXP_START_COL, EXP_FINISH_COL) \
1764   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1765                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1766
1767 /* Test of lexing a file using libcpp, verifying tokens and their
1768    location information.  */
1769
1770 static void
1771 test_lexer (const line_table_case &case_)
1772 {
1773   /* Create a tempfile and write some text to it.  */
1774   const char *content =
1775     /*00000000011111111112222222222333333.3333444444444.455555555556
1776       12345678901234567890123456789012345.6789012345678.901234567890.  */
1777     ("test_name /* c-style comment */\n"
1778      "                                  \"test literal\"\n"
1779      " // test c++-style comment\n"
1780      "   42\n");
1781   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1782
1783   temp_line_table tmp_lt (case_);
1784
1785   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1786
1787   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1788   ASSERT_NE (fname, NULL);
1789
1790   /* Verify that we get the expected tokens back, with the correct
1791      location information.  */
1792
1793   location_t loc;
1794   const cpp_token *tok;
1795   tok = cpp_get_token_with_location (parser, &loc);
1796   ASSERT_NE (tok, NULL);
1797   ASSERT_EQ (tok->type, CPP_NAME);
1798   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1799   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1800
1801   tok = cpp_get_token_with_location (parser, &loc);
1802   ASSERT_NE (tok, NULL);
1803   ASSERT_EQ (tok->type, CPP_STRING);
1804   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1805   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1806
1807   tok = cpp_get_token_with_location (parser, &loc);
1808   ASSERT_NE (tok, NULL);
1809   ASSERT_EQ (tok->type, CPP_NUMBER);
1810   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1811   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1812
1813   tok = cpp_get_token_with_location (parser, &loc);
1814   ASSERT_NE (tok, NULL);
1815   ASSERT_EQ (tok->type, CPP_EOF);
1816
1817   cpp_finish (parser, NULL);
1818   cpp_destroy (parser);
1819 }
1820
1821 /* Forward decls.  */
1822
1823 struct lexer_test;
1824 class lexer_test_options;
1825
1826 /* A class for specifying options of a lexer_test.
1827    The "apply" vfunc is called during the lexer_test constructor.  */
1828
1829 class lexer_test_options
1830 {
1831  public:
1832   virtual void apply (lexer_test &) = 0;
1833 };
1834
1835 /* A struct for writing lexer tests.  */
1836
1837 struct lexer_test
1838 {
1839   lexer_test (const line_table_case &case_, const char *content,
1840               lexer_test_options *options);
1841   ~lexer_test ();
1842
1843   const cpp_token *get_token ();
1844
1845   temp_source_file m_tempfile;
1846   temp_line_table m_tmp_lt;
1847   cpp_reader *m_parser;
1848   string_concat_db m_concats;
1849 };
1850
1851 /* Use an EBCDIC encoding for the execution charset, specifically
1852    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
1853
1854    This exercises iconv integration within libcpp.
1855    Not every build of iconv supports the given charset,
1856    so we need to flag this error and handle it gracefully.  */
1857
1858 class ebcdic_execution_charset : public lexer_test_options
1859 {
1860  public:
1861   ebcdic_execution_charset () : m_num_iconv_errors (0)
1862     {
1863       gcc_assert (s_singleton == NULL);
1864       s_singleton = this;
1865     }
1866   ~ebcdic_execution_charset ()
1867     {
1868       gcc_assert (s_singleton == this);
1869       s_singleton = NULL;
1870     }
1871
1872   void apply (lexer_test &test) FINAL OVERRIDE
1873   {
1874     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
1875     cpp_opts->narrow_charset = "IBM1047";
1876
1877     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
1878     callbacks->error = on_error;
1879   }
1880
1881   static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
1882                         int level ATTRIBUTE_UNUSED,
1883                         int reason ATTRIBUTE_UNUSED,
1884                         rich_location *richloc ATTRIBUTE_UNUSED,
1885                         const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
1886     ATTRIBUTE_FPTR_PRINTF(5,0)
1887   {
1888     gcc_assert (s_singleton);
1889     /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
1890        when the local iconv build doesn't support the conversion.  */
1891     if (strstr (msgid, "not supported by iconv"))
1892       {
1893         s_singleton->m_num_iconv_errors++;
1894         return true;
1895       }
1896
1897     /* Otherwise, we have an unexpected error.  */
1898     abort ();
1899   }
1900
1901   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
1902
1903  private:
1904   static ebcdic_execution_charset *s_singleton;
1905   int m_num_iconv_errors;
1906 };
1907
1908 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
1909
1910 /* Constructor.  Override line_table with a new instance based on CASE_,
1911    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
1912    start parsing the tempfile.  */
1913
1914 lexer_test::lexer_test (const line_table_case &case_, const char *content,
1915                         lexer_test_options *options) :
1916   /* Create a tempfile and write the text to it.  */
1917   m_tempfile (SELFTEST_LOCATION, ".c", content),
1918   m_tmp_lt (case_),
1919   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
1920   m_concats ()
1921 {
1922   if (options)
1923     options->apply (*this);
1924
1925   cpp_init_iconv (m_parser);
1926
1927   /* Parse the file.  */
1928   const char *fname = cpp_read_main_file (m_parser,
1929                                           m_tempfile.get_filename ());
1930   ASSERT_NE (fname, NULL);
1931 }
1932
1933 /* Destructor.  Verify that the next token in m_parser is EOF.  */
1934
1935 lexer_test::~lexer_test ()
1936 {
1937   location_t loc;
1938   const cpp_token *tok;
1939
1940   tok = cpp_get_token_with_location (m_parser, &loc);
1941   ASSERT_NE (tok, NULL);
1942   ASSERT_EQ (tok->type, CPP_EOF);
1943
1944   cpp_finish (m_parser, NULL);
1945   cpp_destroy (m_parser);
1946 }
1947
1948 /* Get the next token from m_parser.  */
1949
1950 const cpp_token *
1951 lexer_test::get_token ()
1952 {
1953   location_t loc;
1954   const cpp_token *tok;
1955
1956   tok = cpp_get_token_with_location (m_parser, &loc);
1957   ASSERT_NE (tok, NULL);
1958   return tok;
1959 }
1960
1961 /* Verify that locations within string literals are correctly handled.  */
1962
1963 /* Verify get_source_range_for_substring for token(s) at STRLOC,
1964    using the string concatenation database for TEST.
1965
1966    Assert that the character at index IDX is on EXPECTED_LINE,
1967    and that it begins at column EXPECTED_START_COL and ends at
1968    EXPECTED_FINISH_COL (unless the locations are beyond
1969    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
1970    columns).  */
1971
1972 static void
1973 assert_char_at_range (const location &loc,
1974                       lexer_test& test,
1975                       location_t strloc, enum cpp_ttype type, int idx,
1976                       int expected_line, int expected_start_col,
1977                       int expected_finish_col)
1978 {
1979   cpp_reader *pfile = test.m_parser;
1980   string_concat_db *concats = &test.m_concats;
1981
1982   source_range actual_range;
1983   const char *err
1984     = get_source_range_for_substring (pfile, concats, strloc, type,
1985                                       idx, idx, &actual_range);
1986   if (should_have_column_data_p (strloc))
1987     ASSERT_EQ_AT (loc, NULL, err);
1988   else
1989     {
1990       ASSERT_STREQ_AT (loc,
1991                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
1992                        err);
1993       return;
1994     }
1995
1996   int actual_start_line = LOCATION_LINE (actual_range.m_start);
1997   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
1998   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
1999   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2000
2001   if (should_have_column_data_p (actual_range.m_start))
2002     {
2003       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2004       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2005     }
2006   if (should_have_column_data_p (actual_range.m_finish))
2007     {
2008       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2009       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2010     }
2011 }
2012
2013 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2014    the effective location of any errors.  */
2015
2016 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2017                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2018   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2019                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2020                         (EXPECTED_FINISH_COL))
2021
2022 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2023    using the string concatenation database for TEST.
2024
2025    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2026
2027 static void
2028 assert_num_substring_ranges (const location &loc,
2029                              lexer_test& test,
2030                              location_t strloc,
2031                              enum cpp_ttype type,
2032                              int expected_num_ranges)
2033 {
2034   cpp_reader *pfile = test.m_parser;
2035   string_concat_db *concats = &test.m_concats;
2036
2037   int actual_num_ranges;
2038   const char *err
2039     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2040                                            &actual_num_ranges);
2041   if (should_have_column_data_p (strloc))
2042     ASSERT_EQ_AT (loc, NULL, err);
2043   else
2044     {
2045       ASSERT_STREQ_AT (loc,
2046                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2047                        err);
2048       return;
2049     }
2050   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2051 }
2052
2053 /* Macro for calling assert_num_substring_ranges, supplying
2054    SELFTEST_LOCATION for the effective location of any errors.  */
2055
2056 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2057                                     EXPECTED_NUM_RANGES)                \
2058   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2059                                (TYPE), (EXPECTED_NUM_RANGES))
2060
2061
2062 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2063    returns an error (using the string concatenation database for TEST).  */
2064
2065 static void
2066 assert_has_no_substring_ranges (const location &loc,
2067                                 lexer_test& test,
2068                                 location_t strloc,
2069                                 enum cpp_ttype type,
2070                                 const char *expected_err)
2071 {
2072   cpp_reader *pfile = test.m_parser;
2073   string_concat_db *concats = &test.m_concats;
2074   cpp_substring_ranges ranges;
2075   const char *actual_err
2076     = get_substring_ranges_for_loc (pfile, concats, strloc,
2077                                     type, ranges);
2078   if (should_have_column_data_p (strloc))
2079     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2080   else
2081     ASSERT_STREQ_AT (loc,
2082                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2083                      actual_err);
2084 }
2085
2086 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2087     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2088                                     (STRLOC), (TYPE), (ERR))
2089
2090 /* Lex a simple string literal.  Verify the substring location data, before
2091    and after running cpp_interpret_string on it.  */
2092
2093 static void
2094 test_lexer_string_locations_simple (const line_table_case &case_)
2095 {
2096   /* Digits 0-9 (with 0 at column 10), the simple way.
2097      ....................000000000.11111111112.2222222223333333333
2098      ....................123456789.01234567890.1234567890123456789
2099      We add a trailing comment to ensure that we correctly locate
2100      the end of the string literal token.  */
2101   const char *content = "        \"0123456789\" /* not a string */\n";
2102   lexer_test test (case_, content, NULL);
2103
2104   /* Verify that we get the expected token back, with the correct
2105      location information.  */
2106   const cpp_token *tok = test.get_token ();
2107   ASSERT_EQ (tok->type, CPP_STRING);
2108   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2109   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2110
2111   /* At this point in lexing, the quote characters are treated as part of
2112      the string (they are stripped off by cpp_interpret_string).  */
2113
2114   ASSERT_EQ (tok->val.str.len, 12);
2115
2116   /* Verify that cpp_interpret_string works.  */
2117   cpp_string dst_string;
2118   const enum cpp_ttype type = CPP_STRING;
2119   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2120                                       &dst_string, type);
2121   ASSERT_TRUE (result);
2122   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2123   free (const_cast <unsigned char *> (dst_string.text));
2124
2125   /* Verify ranges of individual characters.  This no longer includes the
2126      quotes.  */
2127   for (int i = 0; i <= 9; i++)
2128     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2129                           10 + i, 10 + i);
2130
2131   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2132 }
2133
2134 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2135    encoding.  */
2136
2137 static void
2138 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2139 {
2140   /* EBCDIC support requires iconv.  */
2141   if (!HAVE_ICONV)
2142     return;
2143
2144   /* Digits 0-9 (with 0 at column 10), the simple way.
2145      ....................000000000.11111111112.2222222223333333333
2146      ....................123456789.01234567890.1234567890123456789
2147      We add a trailing comment to ensure that we correctly locate
2148      the end of the string literal token.  */
2149   const char *content = "        \"0123456789\" /* not a string */\n";
2150   ebcdic_execution_charset use_ebcdic;
2151   lexer_test test (case_, content, &use_ebcdic);
2152
2153   /* Verify that we get the expected token back, with the correct
2154      location information.  */
2155   const cpp_token *tok = test.get_token ();
2156   ASSERT_EQ (tok->type, CPP_STRING);
2157   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2158   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2159
2160   /* At this point in lexing, the quote characters are treated as part of
2161      the string (they are stripped off by cpp_interpret_string).  */
2162
2163   ASSERT_EQ (tok->val.str.len, 12);
2164
2165   /* The remainder of the test requires an iconv implementation that
2166      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2167   if (use_ebcdic.iconv_errors_occurred_p ())
2168     return;
2169
2170   /* Verify that cpp_interpret_string works.  */
2171   cpp_string dst_string;
2172   const enum cpp_ttype type = CPP_STRING;
2173   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2174                                       &dst_string, type);
2175   ASSERT_TRUE (result);
2176   /* We should now have EBCDIC-encoded text, specifically
2177      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2178      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2179   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2180                 (const char *)dst_string.text);
2181   free (const_cast <unsigned char *> (dst_string.text));
2182
2183   /* Verify that we don't attempt to record substring location information
2184      for such cases.  */
2185   ASSERT_HAS_NO_SUBSTRING_RANGES
2186     (test, tok->src_loc, type,
2187      "execution character set != source character set");
2188 }
2189
2190 /* Lex a string literal containing a hex-escaped character.
2191    Verify the substring location data, before and after running
2192    cpp_interpret_string on it.  */
2193
2194 static void
2195 test_lexer_string_locations_hex (const line_table_case &case_)
2196 {
2197   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2198      and with a space in place of digit 6, to terminate the escaped
2199      hex code.
2200      ....................000000000.111111.11112222.
2201      ....................123456789.012345.67890123.  */
2202   const char *content = "        \"01234\\x35 789\"\n";
2203   lexer_test test (case_, content, NULL);
2204
2205   /* Verify that we get the expected token back, with the correct
2206      location information.  */
2207   const cpp_token *tok = test.get_token ();
2208   ASSERT_EQ (tok->type, CPP_STRING);
2209   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2210   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2211
2212   /* At this point in lexing, the quote characters are treated as part of
2213      the string (they are stripped off by cpp_interpret_string).  */
2214   ASSERT_EQ (tok->val.str.len, 15);
2215
2216   /* Verify that cpp_interpret_string works.  */
2217   cpp_string dst_string;
2218   const enum cpp_ttype type = CPP_STRING;
2219   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2220                                       &dst_string, type);
2221   ASSERT_TRUE (result);
2222   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2223   free (const_cast <unsigned char *> (dst_string.text));
2224
2225   /* Verify ranges of individual characters.  This no longer includes the
2226      quotes.  */
2227   for (int i = 0; i <= 4; i++)
2228     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2229   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2230   for (int i = 6; i <= 9; i++)
2231     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2232
2233   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2234 }
2235
2236 /* Lex a string literal containing an octal-escaped character.
2237    Verify the substring location data after running cpp_interpret_string
2238    on it.  */
2239
2240 static void
2241 test_lexer_string_locations_oct (const line_table_case &case_)
2242 {
2243   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2244      and with a space in place of digit 6, to terminate the escaped
2245      octal code.
2246      ....................000000000.111111.11112222.2222223333333333444
2247      ....................123456789.012345.67890123.4567890123456789012  */
2248   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2249   lexer_test test (case_, content, NULL);
2250
2251   /* Verify that we get the expected token back, with the correct
2252      location information.  */
2253   const cpp_token *tok = test.get_token ();
2254   ASSERT_EQ (tok->type, CPP_STRING);
2255   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2256
2257   /* Verify that cpp_interpret_string works.  */
2258   cpp_string dst_string;
2259   const enum cpp_ttype type = CPP_STRING;
2260   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2261                                       &dst_string, type);
2262   ASSERT_TRUE (result);
2263   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2264   free (const_cast <unsigned char *> (dst_string.text));
2265
2266   /* Verify ranges of individual characters.  This no longer includes the
2267      quotes.  */
2268   for (int i = 0; i < 5; i++)
2269     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2270   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2271   for (int i = 6; i <= 9; i++)
2272     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2273
2274   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2275 }
2276
2277 /* Test of string literal containing letter escapes.  */
2278
2279 static void
2280 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2281 {
2282   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2283      .....................000000000.1.11111.1.1.11222.22222223333333
2284      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2285   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2286   lexer_test test (case_, content, NULL);
2287
2288   /* Verify that we get the expected tokens back.  */
2289   const cpp_token *tok = test.get_token ();
2290   ASSERT_EQ (tok->type, CPP_STRING);
2291   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2292
2293   /* Verify ranges of individual characters. */
2294   /* "\t".  */
2295   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2296                         0, 1, 10, 11);
2297   /* "foo". */
2298   for (int i = 1; i <= 3; i++)
2299     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2300                           i, 1, 11 + i, 11 + i);
2301   /* "\\" and "\n".  */
2302   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2303                         4, 1, 15, 16);
2304   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2305                         5, 1, 17, 18);
2306
2307   /* "bar".  */
2308   for (int i = 6; i <= 8; i++)
2309     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2310                           i, 1, 13 + i, 13 + i);
2311
2312   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 9);
2313 }
2314
2315 /* Another test of a string literal containing a letter escape.
2316    Based on string seen in
2317      printf ("%-%\n");
2318    in gcc.dg/format/c90-printf-1.c.  */
2319
2320 static void
2321 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2322 {
2323   /* .....................000000000.1111.11.1111.22222222223.
2324      .....................123456789.0123.45.6789.01234567890.  */
2325   const char *content = ("        \"%-%\\n\" /* non-str */\n");
2326   lexer_test test (case_, content, NULL);
2327
2328   /* Verify that we get the expected tokens back.  */
2329   const cpp_token *tok = test.get_token ();
2330   ASSERT_EQ (tok->type, CPP_STRING);
2331   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2332
2333   /* Verify ranges of individual characters. */
2334   /* "%-%".  */
2335   for (int i = 0; i < 3; i++)
2336     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2337                           i, 1, 10 + i, 10 + i);
2338   /* "\n".  */
2339   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2340                         3, 1, 13, 14);
2341
2342   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 4);
2343 }
2344
2345 /* Lex a string literal containing UCN 4 characters.
2346    Verify the substring location data after running cpp_interpret_string
2347    on it.  */
2348
2349 static void
2350 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2351 {
2352   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2353      as UCN 4.
2354      ....................000000000.111111.111122.222222223.33333333344444
2355      ....................123456789.012345.678901.234567890.12345678901234  */
2356   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
2357   lexer_test test (case_, content, NULL);
2358
2359   /* Verify that we get the expected token back, with the correct
2360      location information.  */
2361   const cpp_token *tok = test.get_token ();
2362   ASSERT_EQ (tok->type, CPP_STRING);
2363   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2364
2365   /* Verify that cpp_interpret_string works.
2366      The string should be encoded in the execution character
2367      set.  Assuming that that is UTF-8, we should have the following:
2368      -----------  ----  -----  -------  ----------------
2369      Byte offset  Byte  Octal  Unicode  Source Column(s)
2370      -----------  ----  -----  -------  ----------------
2371      0            0x30         '0'      10
2372      1            0x31         '1'      11
2373      2            0x32         '2'      12
2374      3            0x33         '3'      13
2375      4            0x34         '4'      14
2376      5            0xE2  \342   U+2174   15-20
2377      6            0x85  \205    (cont)  15-20
2378      7            0xB4  \264    (cont)  15-20
2379      8            0xE2  \342   U+2175   21-26
2380      9            0x85  \205    (cont)  21-26
2381      10           0xB5  \265    (cont)  21-26
2382      11           0x37         '7'      27
2383      12           0x38         '8'      28
2384      13           0x39         '9'      29
2385      -----------  ----  -----  -------  ---------------.  */
2386
2387   cpp_string dst_string;
2388   const enum cpp_ttype type = CPP_STRING;
2389   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2390                                       &dst_string, type);
2391   ASSERT_TRUE (result);
2392   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2393                 (const char *)dst_string.text);
2394   free (const_cast <unsigned char *> (dst_string.text));
2395
2396   /* Verify ranges of individual characters.  This no longer includes the
2397      quotes.
2398      '01234'.  */
2399   for (int i = 0; i <= 4; i++)
2400     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2401   /* U+2174.  */
2402   for (int i = 5; i <= 7; i++)
2403     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2404   /* U+2175.  */
2405   for (int i = 8; i <= 10; i++)
2406     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2407   /* '789'.  */
2408   for (int i = 11; i <= 13; i++)
2409     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2410
2411   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 14);
2412 }
2413
2414 /* Lex a string literal containing UCN 8 characters.
2415    Verify the substring location data after running cpp_interpret_string
2416    on it.  */
2417
2418 static void
2419 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2420 {
2421   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2422      ....................000000000.111111.1111222222.2222333333333.344444
2423      ....................123456789.012345.6789012345.6789012345678.901234  */
2424   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
2425   lexer_test test (case_, content, NULL);
2426
2427   /* Verify that we get the expected token back, with the correct
2428      location information.  */
2429   const cpp_token *tok = test.get_token ();
2430   ASSERT_EQ (tok->type, CPP_STRING);
2431   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2432                            "\"01234\\U00002174\\U00002175789\"");
2433
2434   /* Verify that cpp_interpret_string works.
2435      The UTF-8 encoding of the string is identical to that from
2436      the ucn4 testcase above; the only difference is the column
2437      locations.  */
2438   cpp_string dst_string;
2439   const enum cpp_ttype type = CPP_STRING;
2440   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2441                                       &dst_string, type);
2442   ASSERT_TRUE (result);
2443   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2444                 (const char *)dst_string.text);
2445   free (const_cast <unsigned char *> (dst_string.text));
2446
2447   /* Verify ranges of individual characters.  This no longer includes the
2448      quotes.
2449      '01234'.  */
2450   for (int i = 0; i <= 4; i++)
2451     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2452   /* U+2174.  */
2453   for (int i = 5; i <= 7; i++)
2454     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2455   /* U+2175.  */
2456   for (int i = 8; i <= 10; i++)
2457     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2458   /* '789' at columns 35-37  */
2459   for (int i = 11; i <= 13; i++)
2460     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2461
2462   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 14);
2463 }
2464
2465 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
2466
2467 static uint32_t
2468 uint32_from_big_endian (const uint32_t *ptr_be_value)
2469 {
2470   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2471   return (((uint32_t) buf[0] << 24)
2472           | ((uint32_t) buf[1] << 16)
2473           | ((uint32_t) buf[2] << 8)
2474           | (uint32_t) buf[3]);
2475 }
2476
2477 /* Lex a wide string literal and verify that attempts to read substring
2478    location data from it fail gracefully.  */
2479
2480 static void
2481 test_lexer_string_locations_wide_string (const line_table_case &case_)
2482 {
2483   /* Digits 0-9.
2484      ....................000000000.11111111112.22222222233333
2485      ....................123456789.01234567890.12345678901234  */
2486   const char *content = "       L\"0123456789\" /* non-str */\n";
2487   lexer_test test (case_, content, NULL);
2488
2489   /* Verify that we get the expected token back, with the correct
2490      location information.  */
2491   const cpp_token *tok = test.get_token ();
2492   ASSERT_EQ (tok->type, CPP_WSTRING);
2493   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2494
2495   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
2496   cpp_string dst_string;
2497   const enum cpp_ttype type = CPP_WSTRING;
2498   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2499                                       &dst_string, type);
2500   ASSERT_TRUE (result);
2501   /* The cpp_reader defaults to big-endian with
2502      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2503      now be encoded as UTF-32BE.  */
2504   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2505   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2506   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2507   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2508   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2509   free (const_cast <unsigned char *> (dst_string.text));
2510
2511   /* We don't yet support generating substring location information
2512      for L"" strings.  */
2513   ASSERT_HAS_NO_SUBSTRING_RANGES
2514     (test, tok->src_loc, type,
2515      "execution character set != source character set");
2516 }
2517
2518 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
2519
2520 static uint16_t
2521 uint16_from_big_endian (const uint16_t *ptr_be_value)
2522 {
2523   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2524   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2525 }
2526
2527 /* Lex a u"" string literal and verify that attempts to read substring
2528    location data from it fail gracefully.  */
2529
2530 static void
2531 test_lexer_string_locations_string16 (const line_table_case &case_)
2532 {
2533   /* Digits 0-9.
2534      ....................000000000.11111111112.22222222233333
2535      ....................123456789.01234567890.12345678901234  */
2536   const char *content = "       u\"0123456789\" /* non-str */\n";
2537   lexer_test test (case_, content, NULL);
2538
2539   /* Verify that we get the expected token back, with the correct
2540      location information.  */
2541   const cpp_token *tok = test.get_token ();
2542   ASSERT_EQ (tok->type, CPP_STRING16);
2543   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2544
2545   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
2546   cpp_string dst_string;
2547   const enum cpp_ttype type = CPP_STRING16;
2548   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2549                                       &dst_string, type);
2550   ASSERT_TRUE (result);
2551
2552   /* The cpp_reader defaults to big-endian, so dst_string should
2553      now be encoded as UTF-16BE.  */
2554   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2555   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2556   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2557   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2558   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2559   free (const_cast <unsigned char *> (dst_string.text));
2560
2561   /* We don't yet support generating substring location information
2562      for L"" strings.  */
2563   ASSERT_HAS_NO_SUBSTRING_RANGES
2564     (test, tok->src_loc, type,
2565      "execution character set != source character set");
2566 }
2567
2568 /* Lex a U"" string literal and verify that attempts to read substring
2569    location data from it fail gracefully.  */
2570
2571 static void
2572 test_lexer_string_locations_string32 (const line_table_case &case_)
2573 {
2574   /* Digits 0-9.
2575      ....................000000000.11111111112.22222222233333
2576      ....................123456789.01234567890.12345678901234  */
2577   const char *content = "       U\"0123456789\" /* non-str */\n";
2578   lexer_test test (case_, content, NULL);
2579
2580   /* Verify that we get the expected token back, with the correct
2581      location information.  */
2582   const cpp_token *tok = test.get_token ();
2583   ASSERT_EQ (tok->type, CPP_STRING32);
2584   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2585
2586   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
2587   cpp_string dst_string;
2588   const enum cpp_ttype type = CPP_STRING32;
2589   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2590                                       &dst_string, type);
2591   ASSERT_TRUE (result);
2592
2593   /* The cpp_reader defaults to big-endian, so dst_string should
2594      now be encoded as UTF-32BE.  */
2595   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2596   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2597   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2598   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2599   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2600   free (const_cast <unsigned char *> (dst_string.text));
2601
2602   /* We don't yet support generating substring location information
2603      for L"" strings.  */
2604   ASSERT_HAS_NO_SUBSTRING_RANGES
2605     (test, tok->src_loc, type,
2606      "execution character set != source character set");
2607 }
2608
2609 /* Lex a u8-string literal.
2610    Verify the substring location data after running cpp_interpret_string
2611    on it.  */
2612
2613 static void
2614 test_lexer_string_locations_u8 (const line_table_case &case_)
2615 {
2616   /* Digits 0-9.
2617      ....................000000000.11111111112.22222222233333
2618      ....................123456789.01234567890.12345678901234  */
2619   const char *content = "      u8\"0123456789\" /* non-str */\n";
2620   lexer_test test (case_, content, NULL);
2621
2622   /* Verify that we get the expected token back, with the correct
2623      location information.  */
2624   const cpp_token *tok = test.get_token ();
2625   ASSERT_EQ (tok->type, CPP_UTF8STRING);
2626   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2627
2628   /* Verify that cpp_interpret_string works.  */
2629   cpp_string dst_string;
2630   const enum cpp_ttype type = CPP_STRING;
2631   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2632                                       &dst_string, type);
2633   ASSERT_TRUE (result);
2634   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2635   free (const_cast <unsigned char *> (dst_string.text));
2636
2637   /* Verify ranges of individual characters.  This no longer includes the
2638      quotes.  */
2639   for (int i = 0; i <= 9; i++)
2640     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2641 }
2642
2643 /* Lex a string literal containing UTF-8 source characters.
2644    Verify the substring location data after running cpp_interpret_string
2645    on it.  */
2646
2647 static void
2648 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2649 {
2650  /* This string literal is written out to the source file as UTF-8,
2651     and is of the form "before mojibake after", where "mojibake"
2652     is written as the following four unicode code points:
2653        U+6587 CJK UNIFIED IDEOGRAPH-6587
2654        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2655        U+5316 CJK UNIFIED IDEOGRAPH-5316
2656        U+3051 HIRAGANA LETTER KE.
2657      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2658      "before" and "after" are 1 byte per unicode character.
2659
2660      The numbering shown are "columns", which are *byte* numbers within
2661      the line, rather than unicode character numbers.
2662
2663      .................... 000000000.1111111.
2664      .................... 123456789.0123456.  */
2665   const char *content = ("        \"before "
2666                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2667                               UTF-8: 0xE6 0x96 0x87
2668                               C octal escaped UTF-8: \346\226\207
2669                             "column" numbers: 17-19.  */
2670                          "\346\226\207"
2671
2672                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2673                               UTF-8: 0xE5 0xAD 0x97
2674                               C octal escaped UTF-8: \345\255\227
2675                             "column" numbers: 20-22.  */
2676                          "\345\255\227"
2677
2678                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2679                               UTF-8: 0xE5 0x8C 0x96
2680                               C octal escaped UTF-8: \345\214\226
2681                             "column" numbers: 23-25.  */
2682                          "\345\214\226"
2683
2684                          /* U+3051 HIRAGANA LETTER KE
2685                               UTF-8: 0xE3 0x81 0x91
2686                               C octal escaped UTF-8: \343\201\221
2687                             "column" numbers: 26-28.  */
2688                          "\343\201\221"
2689
2690                          /* column numbers 29 onwards
2691                           2333333.33334444444444
2692                           9012345.67890123456789. */
2693                          " after\" /* non-str */\n");
2694   lexer_test test (case_, content, NULL);
2695
2696   /* Verify that we get the expected token back, with the correct
2697      location information.  */
2698   const cpp_token *tok = test.get_token ();
2699   ASSERT_EQ (tok->type, CPP_STRING);
2700   ASSERT_TOKEN_AS_TEXT_EQ
2701     (test.m_parser, tok,
2702      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2703
2704   /* Verify that cpp_interpret_string works.  */
2705   cpp_string dst_string;
2706   const enum cpp_ttype type = CPP_STRING;
2707   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2708                                       &dst_string, type);
2709   ASSERT_TRUE (result);
2710   ASSERT_STREQ
2711     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2712      (const char *)dst_string.text);
2713   free (const_cast <unsigned char *> (dst_string.text));
2714
2715   /* Verify ranges of individual characters.  This no longer includes the
2716      quotes.
2717      Assuming that both source and execution encodings are UTF-8, we have
2718      a run of 25 octets in each.  */
2719   for (int i = 0; i < 25; i++)
2720     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2721
2722   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 25);
2723 }
2724
2725 /* Test of string literal concatenation.  */
2726
2727 static void
2728 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
2729 {
2730   /* Digits 0-9.
2731      .....................000000000.111111.11112222222222
2732      .....................123456789.012345.67890123456789.  */
2733   const char *content = ("        \"01234\" /* non-str */\n"
2734                          "        \"56789\" /* non-str */\n");
2735   lexer_test test (case_, content, NULL);
2736
2737   location_t input_locs[2];
2738
2739   /* Verify that we get the expected tokens back.  */
2740   auto_vec <cpp_string> input_strings;
2741   const cpp_token *tok_a = test.get_token ();
2742   ASSERT_EQ (tok_a->type, CPP_STRING);
2743   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
2744   input_strings.safe_push (tok_a->val.str);
2745   input_locs[0] = tok_a->src_loc;
2746
2747   const cpp_token *tok_b = test.get_token ();
2748   ASSERT_EQ (tok_b->type, CPP_STRING);
2749   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
2750   input_strings.safe_push (tok_b->val.str);
2751   input_locs[1] = tok_b->src_loc;
2752
2753   /* Verify that cpp_interpret_string works.  */
2754   cpp_string dst_string;
2755   const enum cpp_ttype type = CPP_STRING;
2756   bool result = cpp_interpret_string (test.m_parser,
2757                                       input_strings.address (), 2,
2758                                       &dst_string, type);
2759   ASSERT_TRUE (result);
2760   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2761   free (const_cast <unsigned char *> (dst_string.text));
2762
2763   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
2764   test.m_concats.record_string_concatenation (2, input_locs);
2765
2766   location_t initial_loc = input_locs[0];
2767
2768   for (int i = 0; i <= 4; i++)
2769     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2770   for (int i = 5; i <= 9; i++)
2771     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
2772
2773   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2774 }
2775
2776 /* Another test of string literal concatenation.  */
2777
2778 static void
2779 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
2780 {
2781   /* Digits 0-9.
2782      .....................000000000.111.11111112222222
2783      .....................123456789.012.34567890123456.  */
2784   const char *content = ("        \"01\" /* non-str */\n"
2785                          "        \"23\" /* non-str */\n"
2786                          "        \"45\" /* non-str */\n"
2787                          "        \"67\" /* non-str */\n"
2788                          "        \"89\" /* non-str */\n");
2789   lexer_test test (case_, content, NULL);
2790
2791   auto_vec <cpp_string> input_strings;
2792   location_t input_locs[5];
2793
2794   /* Verify that we get the expected tokens back.  */
2795   for (int i = 0; i < 5; i++)
2796     {
2797       const cpp_token *tok = test.get_token ();
2798       ASSERT_EQ (tok->type, CPP_STRING);
2799       input_strings.safe_push (tok->val.str);
2800       input_locs[i] = tok->src_loc;
2801     }
2802
2803   /* Verify that cpp_interpret_string works.  */
2804   cpp_string dst_string;
2805   const enum cpp_ttype type = CPP_STRING;
2806   bool result = cpp_interpret_string (test.m_parser,
2807                                       input_strings.address (), 5,
2808                                       &dst_string, type);
2809   ASSERT_TRUE (result);
2810   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2811   free (const_cast <unsigned char *> (dst_string.text));
2812
2813   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
2814   test.m_concats.record_string_concatenation (5, input_locs);
2815
2816   location_t initial_loc = input_locs[0];
2817
2818   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
2819      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
2820      and expect get_source_range_for_substring to fail.
2821      However, for a string concatenation test, we can have a case
2822      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
2823      but subsequent strings can be after it.
2824      Attempting to detect this within assert_char_at_range
2825      would overcomplicate the logic for the common test cases, so
2826      we detect it here.  */
2827   if (should_have_column_data_p (input_locs[0])
2828       && !should_have_column_data_p (input_locs[4]))
2829     {
2830       /* Verify that get_source_range_for_substring gracefully rejects
2831          this case.  */
2832       source_range actual_range;
2833       const char *err
2834         = get_source_range_for_substring (test.m_parser, &test.m_concats,
2835                                           initial_loc, type, 0, 0,
2836                                           &actual_range);
2837       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
2838       return;
2839     }
2840
2841   for (int i = 0; i < 5; i++)
2842     for (int j = 0; j < 2; j++)
2843       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
2844                             i + 1, 10 + j, 10 + j);
2845
2846   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2847 }
2848
2849 /* Another test of string literal concatenation, this time combined with
2850    various kinds of escaped characters.  */
2851
2852 static void
2853 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
2854 {
2855   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
2856      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
2857   const char *content
2858     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
2859        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
2860     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
2861   lexer_test test (case_, content, NULL);
2862
2863   auto_vec <cpp_string> input_strings;
2864   location_t input_locs[4];
2865
2866   /* Verify that we get the expected tokens back.  */
2867   for (int i = 0; i < 4; i++)
2868     {
2869       const cpp_token *tok = test.get_token ();
2870       ASSERT_EQ (tok->type, CPP_STRING);
2871       input_strings.safe_push (tok->val.str);
2872       input_locs[i] = tok->src_loc;
2873     }
2874
2875   /* Verify that cpp_interpret_string works.  */
2876   cpp_string dst_string;
2877   const enum cpp_ttype type = CPP_STRING;
2878   bool result = cpp_interpret_string (test.m_parser,
2879                                       input_strings.address (), 4,
2880                                       &dst_string, type);
2881   ASSERT_TRUE (result);
2882   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2883   free (const_cast <unsigned char *> (dst_string.text));
2884
2885   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
2886   test.m_concats.record_string_concatenation (4, input_locs);
2887
2888   location_t initial_loc = input_locs[0];
2889
2890   for (int i = 0; i <= 4; i++)
2891     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2892   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
2893   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
2894   for (int i = 7; i <= 9; i++)
2895     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
2896
2897   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2898 }
2899
2900 /* Test of string literal in a macro.  */
2901
2902 static void
2903 test_lexer_string_locations_macro (const line_table_case &case_)
2904 {
2905   /* Digits 0-9.
2906      .....................0000000001111111111.22222222223.
2907      .....................1234567890123456789.01234567890.  */
2908   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
2909                          "  MACRO");
2910   lexer_test test (case_, content, NULL);
2911
2912   /* Verify that we get the expected tokens back.  */
2913   const cpp_token *tok = test.get_token ();
2914   ASSERT_EQ (tok->type, CPP_PADDING);
2915
2916   tok = test.get_token ();
2917   ASSERT_EQ (tok->type, CPP_STRING);
2918   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2919
2920   /* Verify ranges of individual characters.  We ought to
2921      see columns within the macro definition.  */
2922   for (int i = 0; i <= 9; i++)
2923     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2924                           i, 1, 20 + i, 20 + i);
2925
2926   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2927
2928   tok = test.get_token ();
2929   ASSERT_EQ (tok->type, CPP_PADDING);
2930 }
2931
2932 /* Test of stringification of a macro argument.  */
2933
2934 static void
2935 test_lexer_string_locations_stringified_macro_argument
2936   (const line_table_case &case_)
2937 {
2938   /* .....................000000000111111111122222222223.
2939      .....................123456789012345678901234567890.  */
2940   const char *content = ("#define MACRO(X) #X /* non-str */\n"
2941                          "MACRO(foo)\n");
2942   lexer_test test (case_, content, NULL);
2943
2944   /* Verify that we get the expected token back.  */
2945   const cpp_token *tok = test.get_token ();
2946   ASSERT_EQ (tok->type, CPP_PADDING);
2947
2948   tok = test.get_token ();
2949   ASSERT_EQ (tok->type, CPP_STRING);
2950   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
2951
2952   /* We don't support getting the location of a stringified macro
2953      argument.  Verify that it fails gracefully.  */
2954   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
2955                                   "cpp_interpret_string_1 failed");
2956
2957   tok = test.get_token ();
2958   ASSERT_EQ (tok->type, CPP_PADDING);
2959
2960   tok = test.get_token ();
2961   ASSERT_EQ (tok->type, CPP_PADDING);
2962 }
2963
2964 /* Ensure that we are fail gracefully if something attempts to pass
2965    in a location that isn't a string literal token.  Seen on this code:
2966
2967      const char a[] = " %d ";
2968      __builtin_printf (a, 0.5);
2969                        ^
2970
2971    when c-format.c erroneously used the indicated one-character
2972    location as the format string location, leading to a read past the
2973    end of a string buffer in cpp_interpret_string_1.  */
2974
2975 static void
2976 test_lexer_string_locations_non_string (const line_table_case &case_)
2977 {
2978   /* .....................000000000111111111122222222223.
2979      .....................123456789012345678901234567890.  */
2980   const char *content = ("         a\n");
2981   lexer_test test (case_, content, NULL);
2982
2983   /* Verify that we get the expected token back.  */
2984   const cpp_token *tok = test.get_token ();
2985   ASSERT_EQ (tok->type, CPP_NAME);
2986   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
2987
2988   /* At this point, libcpp is attempting to interpret the name as a
2989      string literal, despite it not starting with a quote.  We don't detect
2990      that, but we should at least fail gracefully.  */
2991   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
2992                                   "cpp_interpret_string_1 failed");
2993 }
2994
2995 /* Ensure that we can read substring information for a token which
2996    starts in one linemap and ends in another .  Adapted from
2997    gcc.dg/cpp/pr69985.c.  */
2998
2999 static void
3000 test_lexer_string_locations_long_line (const line_table_case &case_)
3001 {
3002   /* .....................000000.000111111111
3003      .....................123456.789012346789.  */
3004   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3005                          "     \"0123456789012345678901234567890123456789"
3006                          "0123456789012345678901234567890123456789"
3007                          "0123456789012345678901234567890123456789"
3008                          "0123456789\"\n");
3009
3010   lexer_test test (case_, content, NULL);
3011
3012   /* Verify that we get the expected token back.  */
3013   const cpp_token *tok = test.get_token ();
3014   ASSERT_EQ (tok->type, CPP_STRING);
3015
3016   if (!should_have_column_data_p (line_table->highest_location))
3017     return;
3018
3019   /* Verify ranges of individual characters.  */
3020   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 130);
3021   for (int i = 0; i < 130; i++)
3022     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3023                           i, 2, 7 + i, 7 + i);
3024 }
3025
3026 /* Test of lexing char constants.  */
3027
3028 static void
3029 test_lexer_char_constants (const line_table_case &case_)
3030 {
3031   /* Various char constants.
3032      .....................0000000001111111111.22222222223.
3033      .....................1234567890123456789.01234567890.  */
3034   const char *content = ("         'a'\n"
3035                          "        u'a'\n"
3036                          "        U'a'\n"
3037                          "        L'a'\n"
3038                          "         'abc'\n");
3039   lexer_test test (case_, content, NULL);
3040
3041   /* Verify that we get the expected tokens back.  */
3042   /* 'a'.  */
3043   const cpp_token *tok = test.get_token ();
3044   ASSERT_EQ (tok->type, CPP_CHAR);
3045   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3046
3047   unsigned int chars_seen;
3048   int unsignedp;
3049   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3050                                           &chars_seen, &unsignedp);
3051   ASSERT_EQ (cc, 'a');
3052   ASSERT_EQ (chars_seen, 1);
3053
3054   /* u'a'.  */
3055   tok = test.get_token ();
3056   ASSERT_EQ (tok->type, CPP_CHAR16);
3057   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3058
3059   /* U'a'.  */
3060   tok = test.get_token ();
3061   ASSERT_EQ (tok->type, CPP_CHAR32);
3062   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3063
3064   /* L'a'.  */
3065   tok = test.get_token ();
3066   ASSERT_EQ (tok->type, CPP_WCHAR);
3067   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3068
3069   /* 'abc' (c-char-sequence).  */
3070   tok = test.get_token ();
3071   ASSERT_EQ (tok->type, CPP_CHAR);
3072   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3073 }
3074 /* A table of interesting location_t values, giving one axis of our test
3075    matrix.  */
3076
3077 static const location_t boundary_locations[] = {
3078   /* Zero means "don't override the default values for a new line_table".  */
3079   0,
3080
3081   /* An arbitrary non-zero value that isn't close to one of
3082      the boundary values below.  */
3083   0x10000,
3084
3085   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3086   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3087   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3088   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3089   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3090   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3091
3092   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3093   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3094   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3095   LINE_MAP_MAX_LOCATION_WITH_COLS,
3096   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3097   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3098 };
3099
3100 /* Run all of the selftests within this file.  */
3101
3102 void
3103 input_c_tests ()
3104 {
3105   test_should_have_column_data_p ();
3106   test_unknown_location ();
3107   test_builtins ();
3108
3109   /* As noted above in the description of struct line_table_case,
3110      we want to explore a test matrix of interesting line_table
3111      situations, running various selftests for each case within the
3112      matrix.  */
3113
3114   /* Run all tests with:
3115      (a) line_table->default_range_bits == 0, and
3116      (b) line_table->default_range_bits == 5.  */
3117   int num_cases_tested = 0;
3118   for (int default_range_bits = 0; default_range_bits <= 5;
3119        default_range_bits += 5)
3120     {
3121       /* ...and use each of the "interesting" location values as
3122          the starting location within line_table.  */
3123       const int num_boundary_locations
3124         = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3125       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3126         {
3127           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3128
3129           /* Run all tests for the given case within the test matrix.  */
3130           test_accessing_ordinary_linemaps (c);
3131           test_lexer (c);
3132           test_lexer_string_locations_simple (c);
3133           test_lexer_string_locations_ebcdic (c);
3134           test_lexer_string_locations_hex (c);
3135           test_lexer_string_locations_oct (c);
3136           test_lexer_string_locations_letter_escape_1 (c);
3137           test_lexer_string_locations_letter_escape_2 (c);
3138           test_lexer_string_locations_ucn4 (c);
3139           test_lexer_string_locations_ucn8 (c);
3140           test_lexer_string_locations_wide_string (c);
3141           test_lexer_string_locations_string16 (c);
3142           test_lexer_string_locations_string32 (c);
3143           test_lexer_string_locations_u8 (c);
3144           test_lexer_string_locations_utf8_source (c);
3145           test_lexer_string_locations_concatenation_1 (c);
3146           test_lexer_string_locations_concatenation_2 (c);
3147           test_lexer_string_locations_concatenation_3 (c);
3148           test_lexer_string_locations_macro (c);
3149           test_lexer_string_locations_stringified_macro_argument (c);
3150           test_lexer_string_locations_non_string (c);
3151           test_lexer_string_locations_long_line (c);
3152           test_lexer_char_constants (c);
3153
3154           num_cases_tested++;
3155         }
3156     }
3157
3158   /* Verify that we fully covered the test matrix.  */
3159   ASSERT_EQ (num_cases_tested, 2 * 12);
3160
3161   test_reading_source_line ();
3162 }
3163
3164 } // namespace selftest
3165
3166 #endif /* CHECKING_P */