[testsuite] Fix FAIL: gcc.dg/lto/pr69188 on bare-metal targets
[official-gcc.git] / gcc / input.c
blob3e67314932a1faf08a7db2fb59fc119bb2a913f2
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic-core.h"
25 #include "selftest.h"
26 #include "cpplib.h"
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
32 /* This is a cache used by get_next_line to store the content of a
33 file to be searched for file lines. */
34 struct fcache
36 /* These are information used to store a line boundary. */
37 struct line_info
39 /* The line number. It starts from 1. */
40 size_t line_num;
42 /* The position (byte count) of the beginning of the line,
43 relative to the file data pointer. This starts at zero. */
44 size_t start_pos;
46 /* The position (byte count) of the last byte of the line. This
47 normally points to the '\n' character, or to one byte after the
48 last byte of the file, if the file doesn't contain a '\n'
49 character. */
50 size_t end_pos;
52 line_info (size_t l, size_t s, size_t e)
53 : line_num (l), start_pos (s), end_pos (e)
56 line_info ()
57 :line_num (0), start_pos (0), end_pos (0)
61 /* The number of time this file has been accessed. This is used
62 to designate which file cache to evict from the cache
63 array. */
64 unsigned use_count;
66 /* The file_path is the key for identifying a particular file in
67 the cache.
68 For libcpp-using code, the underlying buffer for this field is
69 owned by the corresponding _cpp_file within the cpp_reader. */
70 const char *file_path;
72 FILE *fp;
74 /* This points to the content of the file that we've read so
75 far. */
76 char *data;
78 /* The size of the DATA array above.*/
79 size_t size;
81 /* The number of bytes read from the underlying file so far. This
82 must be less (or equal) than SIZE above. */
83 size_t nb_read;
85 /* The index of the beginning of the current line. */
86 size_t line_start_idx;
88 /* The number of the previous line read. This starts at 1. Zero
89 means we've read no line so far. */
90 size_t line_num;
92 /* This is the total number of lines of the current file. At the
93 moment, we try to get this information from the line map
94 subsystem. Note that this is just a hint. When using the C++
95 front-end, this hint is correct because the input file is then
96 completely tokenized before parsing starts; so the line map knows
97 the number of lines before compilation really starts. For e.g,
98 the C front-end, it can happen that we start emitting diagnostics
99 before the line map has seen the end of the file. */
100 size_t total_lines;
102 /* Could this file be missing a trailing newline on its final line?
103 Initially true (to cope with empty files), set to true/false
104 as each line is read. */
105 bool missing_trailing_newline;
107 /* This is a record of the beginning and end of the lines we've seen
108 while reading the file. This is useful to avoid walking the data
109 from the beginning when we are asked to read a line that is
110 before LINE_START_IDX above. Note that the maximum size of this
111 record is fcache_line_record_size, so that the memory consumption
112 doesn't explode. We thus scale total_lines down to
113 fcache_line_record_size. */
114 vec<line_info, va_heap> line_record;
116 fcache ();
117 ~fcache ();
120 /* Current position in real source file. */
122 location_t input_location = UNKNOWN_LOCATION;
124 struct line_maps *line_table;
126 /* A stashed copy of "line_table" for use by selftest::line_table_test.
127 This needs to be a global so that it can be a GC root, and thus
128 prevent the stashed copy from being garbage-collected if the GC runs
129 during a line_table_test. */
131 struct line_maps *saved_line_table;
133 static fcache *fcache_tab;
134 static const size_t fcache_tab_size = 16;
135 static const size_t fcache_buffer_size = 4 * 1024;
136 static const size_t fcache_line_record_size = 100;
138 /* Expand the source location LOC into a human readable location. If
139 LOC resolves to a builtin location, the file name of the readable
140 location is set to the string "<built-in>". If EXPANSION_POINT_P is
141 TRUE and LOC is virtual, then it is resolved to the expansion
142 point of the involved macro. Otherwise, it is resolved to the
143 spelling location of the token.
145 When resolving to the spelling location of the token, if the
146 resulting location is for a built-in location (that is, it has no
147 associated line/column) in the context of a macro expansion, the
148 returned location is the first one (while unwinding the macro
149 location towards its expansion point) that is in real source
150 code. */
152 static expanded_location
153 expand_location_1 (source_location loc,
154 bool expansion_point_p)
156 expanded_location xloc;
157 const line_map_ordinary *map;
158 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
159 tree block = NULL;
161 if (IS_ADHOC_LOC (loc))
163 block = LOCATION_BLOCK (loc);
164 loc = LOCATION_LOCUS (loc);
167 memset (&xloc, 0, sizeof (xloc));
169 if (loc >= RESERVED_LOCATION_COUNT)
171 if (!expansion_point_p)
173 /* We want to resolve LOC to its spelling location.
175 But if that spelling location is a reserved location that
176 appears in the context of a macro expansion (like for a
177 location for a built-in token), let's consider the first
178 location (toward the expansion point) that is not reserved;
179 that is, the first location that is in real source code. */
180 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
181 loc, NULL);
182 lrk = LRK_SPELLING_LOCATION;
184 loc = linemap_resolve_location (line_table, loc,
185 lrk, &map);
186 xloc = linemap_expand_location (line_table, map, loc);
189 xloc.data = block;
190 if (loc <= BUILTINS_LOCATION)
191 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
193 return xloc;
196 /* Initialize the set of cache used for files accessed by caret
197 diagnostic. */
199 static void
200 diagnostic_file_cache_init (void)
202 if (fcache_tab == NULL)
203 fcache_tab = new fcache[fcache_tab_size];
206 /* Free the resources used by the set of cache used for files accessed
207 by caret diagnostic. */
209 void
210 diagnostic_file_cache_fini (void)
212 if (fcache_tab)
214 delete [] (fcache_tab);
215 fcache_tab = NULL;
219 /* Return the total lines number that have been read so far by the
220 line map (in the preprocessor) so far. For languages like C++ that
221 entirely preprocess the input file before starting to parse, this
222 equals the actual number of lines of the file. */
224 static size_t
225 total_lines_num (const char *file_path)
227 size_t r = 0;
228 source_location l = 0;
229 if (linemap_get_file_highest_location (line_table, file_path, &l))
231 gcc_assert (l >= RESERVED_LOCATION_COUNT);
232 expanded_location xloc = expand_location (l);
233 r = xloc.line;
235 return r;
238 /* Lookup the cache used for the content of a given file accessed by
239 caret diagnostic. Return the found cached file, or NULL if no
240 cached file was found. */
242 static fcache*
243 lookup_file_in_cache_tab (const char *file_path)
245 if (file_path == NULL)
246 return NULL;
248 diagnostic_file_cache_init ();
250 /* This will contain the found cached file. */
251 fcache *r = NULL;
252 for (unsigned i = 0; i < fcache_tab_size; ++i)
254 fcache *c = &fcache_tab[i];
255 if (c->file_path && !strcmp (c->file_path, file_path))
257 ++c->use_count;
258 r = c;
262 if (r)
263 ++r->use_count;
265 return r;
268 /* Purge any mention of FILENAME from the cache of files used for
269 printing source code. For use in selftests when working
270 with tempfiles. */
272 void
273 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
275 gcc_assert (file_path);
277 fcache *r = lookup_file_in_cache_tab (file_path);
278 if (!r)
279 /* Not found. */
280 return;
282 r->file_path = NULL;
283 if (r->fp)
284 fclose (r->fp);
285 r->fp = NULL;
286 r->nb_read = 0;
287 r->line_start_idx = 0;
288 r->line_num = 0;
289 r->line_record.truncate (0);
290 r->use_count = 0;
291 r->total_lines = 0;
292 r->missing_trailing_newline = true;
295 /* Return the file cache that has been less used, recently, or the
296 first empty one. If HIGHEST_USE_COUNT is non-null,
297 *HIGHEST_USE_COUNT is set to the highest use count of the entries
298 in the cache table. */
300 static fcache*
301 evicted_cache_tab_entry (unsigned *highest_use_count)
303 diagnostic_file_cache_init ();
305 fcache *to_evict = &fcache_tab[0];
306 unsigned huc = to_evict->use_count;
307 for (unsigned i = 1; i < fcache_tab_size; ++i)
309 fcache *c = &fcache_tab[i];
310 bool c_is_empty = (c->file_path == NULL);
312 if (c->use_count < to_evict->use_count
313 || (to_evict->file_path && c_is_empty))
314 /* We evict C because it's either an entry with a lower use
315 count or one that is empty. */
316 to_evict = c;
318 if (huc < c->use_count)
319 huc = c->use_count;
321 if (c_is_empty)
322 /* We've reached the end of the cache; subsequent elements are
323 all empty. */
324 break;
327 if (highest_use_count)
328 *highest_use_count = huc;
330 return to_evict;
333 /* Create the cache used for the content of a given file to be
334 accessed by caret diagnostic. This cache is added to an array of
335 cache and can be retrieved by lookup_file_in_cache_tab. This
336 function returns the created cache. Note that only the last
337 fcache_tab_size files are cached. */
339 static fcache*
340 add_file_to_cache_tab (const char *file_path)
343 FILE *fp = fopen (file_path, "r");
344 if (fp == NULL)
345 return NULL;
347 unsigned highest_use_count = 0;
348 fcache *r = evicted_cache_tab_entry (&highest_use_count);
349 r->file_path = file_path;
350 if (r->fp)
351 fclose (r->fp);
352 r->fp = fp;
353 r->nb_read = 0;
354 r->line_start_idx = 0;
355 r->line_num = 0;
356 r->line_record.truncate (0);
357 /* Ensure that this cache entry doesn't get evicted next time
358 add_file_to_cache_tab is called. */
359 r->use_count = ++highest_use_count;
360 r->total_lines = total_lines_num (file_path);
361 r->missing_trailing_newline = true;
363 return r;
366 /* Lookup the cache used for the content of a given file accessed by
367 caret diagnostic. If no cached file was found, create a new cache
368 for this file, add it to the array of cached file and return
369 it. */
371 static fcache*
372 lookup_or_add_file_to_cache_tab (const char *file_path)
374 fcache *r = lookup_file_in_cache_tab (file_path);
375 if (r == NULL)
376 r = add_file_to_cache_tab (file_path);
377 return r;
380 /* Default constructor for a cache of file used by caret
381 diagnostic. */
383 fcache::fcache ()
384 : use_count (0), file_path (NULL), fp (NULL), data (0),
385 size (0), nb_read (0), line_start_idx (0), line_num (0),
386 total_lines (0), missing_trailing_newline (true)
388 line_record.create (0);
391 /* Destructor for a cache of file used by caret diagnostic. */
393 fcache::~fcache ()
395 if (fp)
397 fclose (fp);
398 fp = NULL;
400 if (data)
402 XDELETEVEC (data);
403 data = 0;
405 line_record.release ();
408 /* Returns TRUE iff the cache would need to be filled with data coming
409 from the file. That is, either the cache is empty or full or the
410 current line is empty. Note that if the cache is full, it would
411 need to be extended and filled again. */
413 static bool
414 needs_read (fcache *c)
416 return (c->nb_read == 0
417 || c->nb_read == c->size
418 || (c->line_start_idx >= c->nb_read - 1));
421 /* Return TRUE iff the cache is full and thus needs to be
422 extended. */
424 static bool
425 needs_grow (fcache *c)
427 return c->nb_read == c->size;
430 /* Grow the cache if it needs to be extended. */
432 static void
433 maybe_grow (fcache *c)
435 if (!needs_grow (c))
436 return;
438 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
439 c->data = XRESIZEVEC (char, c->data, size);
440 c->size = size;
443 /* Read more data into the cache. Extends the cache if need be.
444 Returns TRUE iff new data could be read. */
446 static bool
447 read_data (fcache *c)
449 if (feof (c->fp) || ferror (c->fp))
450 return false;
452 maybe_grow (c);
454 char * from = c->data + c->nb_read;
455 size_t to_read = c->size - c->nb_read;
456 size_t nb_read = fread (from, 1, to_read, c->fp);
458 if (ferror (c->fp))
459 return false;
461 c->nb_read += nb_read;
462 return !!nb_read;
465 /* Read new data iff the cache needs to be filled with more data
466 coming from the file FP. Return TRUE iff the cache was filled with
467 mode data. */
469 static bool
470 maybe_read_data (fcache *c)
472 if (!needs_read (c))
473 return false;
474 return read_data (c);
477 /* Read a new line from file FP, using C as a cache for the data
478 coming from the file. Upon successful completion, *LINE is set to
479 the beginning of the line found. *LINE points directly in the
480 line cache and is only valid until the next call of get_next_line.
481 *LINE_LEN is set to the length of the line. Note that the line
482 does not contain any terminal delimiter. This function returns
483 true if some data was read or process from the cache, false
484 otherwise. Note that subsequent calls to get_next_line might
485 make the content of *LINE invalid. */
487 static bool
488 get_next_line (fcache *c, char **line, ssize_t *line_len)
490 /* Fill the cache with data to process. */
491 maybe_read_data (c);
493 size_t remaining_size = c->nb_read - c->line_start_idx;
494 if (remaining_size == 0)
495 /* There is no more data to process. */
496 return false;
498 char *line_start = c->data + c->line_start_idx;
500 char *next_line_start = NULL;
501 size_t len = 0;
502 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
503 if (line_end == NULL)
505 /* We haven't found the end-of-line delimiter in the cache.
506 Fill the cache with more data from the file and look for the
507 '\n'. */
508 while (maybe_read_data (c))
510 line_start = c->data + c->line_start_idx;
511 remaining_size = c->nb_read - c->line_start_idx;
512 line_end = (char *) memchr (line_start, '\n', remaining_size);
513 if (line_end != NULL)
515 next_line_start = line_end + 1;
516 break;
519 if (line_end == NULL)
521 /* We've loadded all the file into the cache and still no
522 '\n'. Let's say the line ends up at one byte passed the
523 end of the file. This is to stay consistent with the case
524 of when the line ends up with a '\n' and line_end points to
525 that terminal '\n'. That consistency is useful below in
526 the len calculation. */
527 line_end = c->data + c->nb_read ;
528 c->missing_trailing_newline = true;
530 else
531 c->missing_trailing_newline = false;
533 else
535 next_line_start = line_end + 1;
536 c->missing_trailing_newline = false;
539 if (ferror (c->fp))
540 return false;
542 /* At this point, we've found the end of the of line. It either
543 points to the '\n' or to one byte after the last byte of the
544 file. */
545 gcc_assert (line_end != NULL);
547 len = line_end - line_start;
549 if (c->line_start_idx < c->nb_read)
550 *line = line_start;
552 ++c->line_num;
554 /* Before we update our line record, make sure the hint about the
555 total number of lines of the file is correct. If it's not, then
556 we give up recording line boundaries from now on. */
557 bool update_line_record = true;
558 if (c->line_num > c->total_lines)
559 update_line_record = false;
561 /* Now update our line record so that re-reading lines from the
562 before c->line_start_idx is faster. */
563 if (update_line_record
564 && c->line_record.length () < fcache_line_record_size)
566 /* If the file lines fits in the line record, we just record all
567 its lines ...*/
568 if (c->total_lines <= fcache_line_record_size
569 && c->line_num > c->line_record.length ())
570 c->line_record.safe_push (fcache::line_info (c->line_num,
571 c->line_start_idx,
572 line_end - c->data));
573 else if (c->total_lines > fcache_line_record_size)
575 /* ... otherwise, we just scale total_lines down to
576 (fcache_line_record_size lines. */
577 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
578 if (c->line_record.length () == 0
579 || n >= c->line_record.length ())
580 c->line_record.safe_push (fcache::line_info (c->line_num,
581 c->line_start_idx,
582 line_end - c->data));
586 /* Update c->line_start_idx so that it points to the next line to be
587 read. */
588 if (next_line_start)
589 c->line_start_idx = next_line_start - c->data;
590 else
591 /* We didn't find any terminal '\n'. Let's consider that the end
592 of line is the end of the data in the cache. The next
593 invocation of get_next_line will either read more data from the
594 underlying file or return false early because we've reached the
595 end of the file. */
596 c->line_start_idx = c->nb_read;
598 *line_len = len;
600 return true;
603 /* Consume the next bytes coming from the cache (or from its
604 underlying file if there are remaining unread bytes in the file)
605 until we reach the next end-of-line (or end-of-file). There is no
606 copying from the cache involved. Return TRUE upon successful
607 completion. */
609 static bool
610 goto_next_line (fcache *cache)
612 char *l;
613 ssize_t len;
615 return get_next_line (cache, &l, &len);
618 /* Read an arbitrary line number LINE_NUM from the file cached in C.
619 If the line was read successfully, *LINE points to the beginning
620 of the line in the file cache and *LINE_LEN is the length of the
621 line. *LINE is not nul-terminated, but may contain zero bytes.
622 *LINE is only valid until the next call of read_line_num.
623 This function returns bool if a line was read. */
625 static bool
626 read_line_num (fcache *c, size_t line_num,
627 char **line, ssize_t *line_len)
629 gcc_assert (line_num > 0);
631 if (line_num <= c->line_num)
633 /* We've been asked to read lines that are before c->line_num.
634 So lets use our line record (if it's not empty) to try to
635 avoid re-reading the file from the beginning again. */
637 if (c->line_record.is_empty ())
639 c->line_start_idx = 0;
640 c->line_num = 0;
642 else
644 fcache::line_info *i = NULL;
645 if (c->total_lines <= fcache_line_record_size)
647 /* In languages where the input file is not totally
648 preprocessed up front, the c->total_lines hint
649 can be smaller than the number of lines of the
650 file. In that case, only the first
651 c->total_lines have been recorded.
653 Otherwise, the first c->total_lines we've read have
654 their start/end recorded here. */
655 i = (line_num <= c->total_lines)
656 ? &c->line_record[line_num - 1]
657 : &c->line_record[c->total_lines - 1];
658 gcc_assert (i->line_num <= line_num);
660 else
662 /* So the file had more lines than our line record
663 size. Thus the number of lines we've recorded has
664 been scaled down to fcache_line_reacord_size. Let's
665 pick the start/end of the recorded line that is
666 closest to line_num. */
667 size_t n = (line_num <= c->total_lines)
668 ? line_num * fcache_line_record_size / c->total_lines
669 : c ->line_record.length () - 1;
670 if (n < c->line_record.length ())
672 i = &c->line_record[n];
673 gcc_assert (i->line_num <= line_num);
677 if (i && i->line_num == line_num)
679 /* We have the start/end of the line. */
680 *line = c->data + i->start_pos;
681 *line_len = i->end_pos - i->start_pos;
682 return true;
685 if (i)
687 c->line_start_idx = i->start_pos;
688 c->line_num = i->line_num - 1;
690 else
692 c->line_start_idx = 0;
693 c->line_num = 0;
698 /* Let's walk from line c->line_num up to line_num - 1, without
699 copying any line. */
700 while (c->line_num < line_num - 1)
701 if (!goto_next_line (c))
702 return false;
704 /* The line we want is the next one. Let's read and copy it back to
705 the caller. */
706 return get_next_line (c, line, line_len);
709 /* Return the physical source line that corresponds to FILE_PATH/LINE.
710 The line is not nul-terminated. The returned pointer is only
711 valid until the next call of location_get_source_line.
712 Note that the line can contain several null characters,
713 so LINE_LEN, if non-null, points to the actual length of the line.
714 If the function fails, NULL is returned. */
716 const char *
717 location_get_source_line (const char *file_path, int line,
718 int *line_len)
720 char *buffer = NULL;
721 ssize_t len;
723 if (line == 0)
724 return NULL;
726 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
727 if (c == NULL)
728 return NULL;
730 bool read = read_line_num (c, line, &buffer, &len);
732 if (read && line_len)
733 *line_len = len;
735 return read ? buffer : NULL;
738 /* Determine if FILE_PATH missing a trailing newline on its final line.
739 Only valid to call once all of the file has been loaded, by
740 requesting a line number beyond the end of the file. */
742 bool
743 location_missing_trailing_newline (const char *file_path)
745 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
746 if (c == NULL)
747 return false;
749 return c->missing_trailing_newline;
752 /* Test if the location originates from the spelling location of a
753 builtin-tokens. That is, return TRUE if LOC is a (possibly
754 virtual) location of a built-in token that appears in the expansion
755 list of a macro. Please note that this function also works on
756 tokens that result from built-in tokens. For instance, the
757 function would return true if passed a token "4" that is the result
758 of the expansion of the built-in __LINE__ macro. */
759 bool
760 is_location_from_builtin_token (source_location loc)
762 const line_map_ordinary *map = NULL;
763 loc = linemap_resolve_location (line_table, loc,
764 LRK_SPELLING_LOCATION, &map);
765 return loc == BUILTINS_LOCATION;
768 /* Expand the source location LOC into a human readable location. If
769 LOC is virtual, it resolves to the expansion point of the involved
770 macro. If LOC resolves to a builtin location, the file name of the
771 readable location is set to the string "<built-in>". */
773 expanded_location
774 expand_location (source_location loc)
776 return expand_location_1 (loc, /*expansion_point_p=*/true);
779 /* Expand the source location LOC into a human readable location. If
780 LOC is virtual, it resolves to the expansion location of the
781 relevant macro. If LOC resolves to a builtin location, the file
782 name of the readable location is set to the string
783 "<built-in>". */
785 expanded_location
786 expand_location_to_spelling_point (source_location loc)
788 return expand_location_1 (loc, /*expansion_point_p=*/false);
791 /* The rich_location class within libcpp requires a way to expand
792 source_location instances, and relies on the client code
793 providing a symbol named
794 linemap_client_expand_location_to_spelling_point
795 to do this.
797 This is the implementation for libcommon.a (all host binaries),
798 which simply calls into expand_location_to_spelling_point. */
800 expanded_location
801 linemap_client_expand_location_to_spelling_point (source_location loc)
803 return expand_location_to_spelling_point (loc);
807 /* If LOCATION is in a system header and if it is a virtual location for
808 a token coming from the expansion of a macro, unwind it to the
809 location of the expansion point of the macro. Otherwise, just return
810 LOCATION.
812 This is used for instance when we want to emit diagnostics about a
813 token that may be located in a macro that is itself defined in a
814 system header, for example, for the NULL macro. In such a case, if
815 LOCATION were passed directly to diagnostic functions such as
816 warning_at, the diagnostic would be suppressed (unless
817 -Wsystem-headers). */
819 source_location
820 expansion_point_location_if_in_system_header (source_location location)
822 if (in_system_header_at (location))
823 location = linemap_resolve_location (line_table, location,
824 LRK_MACRO_EXPANSION_POINT,
825 NULL);
826 return location;
829 /* If LOCATION is a virtual location for a token coming from the expansion
830 of a macro, unwind to the location of the expansion point of the macro. */
832 source_location
833 expansion_point_location (source_location location)
835 return linemap_resolve_location (line_table, location,
836 LRK_MACRO_EXPANSION_POINT, NULL);
839 /* Construct a location with caret at CARET, ranging from START to
840 finish e.g.
842 11111111112
843 12345678901234567890
845 523 return foo + bar;
846 ~~~~^~~~~
849 The location's caret is at the "+", line 523 column 15, but starts
850 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
851 of "bar" at column 19. */
853 location_t
854 make_location (location_t caret, location_t start, location_t finish)
856 location_t pure_loc = get_pure_location (caret);
857 source_range src_range;
858 src_range.m_start = get_start (start);
859 src_range.m_finish = get_finish (finish);
860 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
861 pure_loc,
862 src_range,
863 NULL);
864 return combined_loc;
867 #define ONE_K 1024
868 #define ONE_M (ONE_K * ONE_K)
870 /* Display a number as an integer multiple of either:
871 - 1024, if said integer is >= to 10 K (in base 2)
872 - 1024 * 1024, if said integer is >= 10 M in (base 2)
874 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
875 ? (x) \
876 : ((x) < 10 * ONE_M \
877 ? (x) / ONE_K \
878 : (x) / ONE_M)))
880 /* For a given integer, display either:
881 - the character 'k', if the number is higher than 10 K (in base 2)
882 but strictly lower than 10 M (in base 2)
883 - the character 'M' if the number is higher than 10 M (in base2)
884 - the charcter ' ' if the number is strictly lower than 10 K */
885 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
887 /* Display an integer amount as multiple of 1K or 1M (in base 2).
888 Display the correct unit (either k, M, or ' ') after the amout, as
889 well. */
890 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
892 /* Dump statistics to stderr about the memory usage of the line_table
893 set of line maps. This also displays some statistics about macro
894 expansion. */
896 void
897 dump_line_table_statistics (void)
899 struct linemap_stats s;
900 long total_used_map_size,
901 macro_maps_size,
902 total_allocated_map_size;
904 memset (&s, 0, sizeof (s));
906 linemap_get_statistics (line_table, &s);
908 macro_maps_size = s.macro_maps_used_size
909 + s.macro_maps_locations_size;
911 total_allocated_map_size = s.ordinary_maps_allocated_size
912 + s.macro_maps_allocated_size
913 + s.macro_maps_locations_size;
915 total_used_map_size = s.ordinary_maps_used_size
916 + s.macro_maps_used_size
917 + s.macro_maps_locations_size;
919 fprintf (stderr, "Number of expanded macros: %5ld\n",
920 s.num_expanded_macros);
921 if (s.num_expanded_macros != 0)
922 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
923 s.num_macro_tokens / s.num_expanded_macros);
924 fprintf (stderr,
925 "\nLine Table allocations during the "
926 "compilation process\n");
927 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
928 SCALE (s.num_ordinary_maps_used),
929 STAT_LABEL (s.num_ordinary_maps_used));
930 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
931 SCALE (s.ordinary_maps_used_size),
932 STAT_LABEL (s.ordinary_maps_used_size));
933 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
934 SCALE (s.num_ordinary_maps_allocated),
935 STAT_LABEL (s.num_ordinary_maps_allocated));
936 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
937 SCALE (s.ordinary_maps_allocated_size),
938 STAT_LABEL (s.ordinary_maps_allocated_size));
939 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
940 SCALE (s.num_macro_maps_used),
941 STAT_LABEL (s.num_macro_maps_used));
942 fprintf (stderr, "Macro maps used size: %5ld%c\n",
943 SCALE (s.macro_maps_used_size),
944 STAT_LABEL (s.macro_maps_used_size));
945 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
946 SCALE (s.macro_maps_locations_size),
947 STAT_LABEL (s.macro_maps_locations_size));
948 fprintf (stderr, "Macro maps size: %5ld%c\n",
949 SCALE (macro_maps_size),
950 STAT_LABEL (macro_maps_size));
951 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
952 SCALE (s.duplicated_macro_maps_locations_size),
953 STAT_LABEL (s.duplicated_macro_maps_locations_size));
954 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
955 SCALE (total_allocated_map_size),
956 STAT_LABEL (total_allocated_map_size));
957 fprintf (stderr, "Total used maps size: %5ld%c\n",
958 SCALE (total_used_map_size),
959 STAT_LABEL (total_used_map_size));
960 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
961 SCALE (s.adhoc_table_size),
962 STAT_LABEL (s.adhoc_table_size));
963 fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
964 s.adhoc_table_entries_used);
965 fprintf (stderr, "optimized_ranges: %i\n",
966 line_table->num_optimized_ranges);
967 fprintf (stderr, "unoptimized_ranges: %i\n",
968 line_table->num_unoptimized_ranges);
970 fprintf (stderr, "\n");
973 /* Get location one beyond the final location in ordinary map IDX. */
975 static source_location
976 get_end_location (struct line_maps *set, unsigned int idx)
978 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
979 return set->highest_location;
981 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
982 return MAP_START_LOCATION (next_map);
985 /* Helper function for write_digit_row. */
987 static void
988 write_digit (FILE *stream, int digit)
990 fputc ('0' + (digit % 10), stream);
993 /* Helper function for dump_location_info.
994 Write a row of numbers to STREAM, numbering a source line,
995 giving the units, tens, hundreds etc of the column number. */
997 static void
998 write_digit_row (FILE *stream, int indent,
999 const line_map_ordinary *map,
1000 source_location loc, int max_col, int divisor)
1002 fprintf (stream, "%*c", indent, ' ');
1003 fprintf (stream, "|");
1004 for (int column = 1; column < max_col; column++)
1006 source_location column_loc = loc + (column << map->m_range_bits);
1007 write_digit (stream, column_loc / divisor);
1009 fprintf (stream, "\n");
1012 /* Write a half-closed (START) / half-open (END) interval of
1013 source_location to STREAM. */
1015 static void
1016 dump_location_range (FILE *stream,
1017 source_location start, source_location end)
1019 fprintf (stream,
1020 " source_location interval: %u <= loc < %u\n",
1021 start, end);
1024 /* Write a labelled description of a half-closed (START) / half-open (END)
1025 interval of source_location to STREAM. */
1027 static void
1028 dump_labelled_location_range (FILE *stream,
1029 const char *name,
1030 source_location start, source_location end)
1032 fprintf (stream, "%s\n", name);
1033 dump_location_range (stream, start, end);
1034 fprintf (stream, "\n");
1037 /* Write a visualization of the locations in the line_table to STREAM. */
1039 void
1040 dump_location_info (FILE *stream)
1042 /* Visualize the reserved locations. */
1043 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1044 0, RESERVED_LOCATION_COUNT);
1046 /* Visualize the ordinary line_map instances, rendering the sources. */
1047 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1049 source_location end_location = get_end_location (line_table, idx);
1050 /* half-closed: doesn't include this one. */
1052 const line_map_ordinary *map
1053 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1054 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1055 dump_location_range (stream,
1056 MAP_START_LOCATION (map), end_location);
1057 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1058 fprintf (stream, " starting at line: %i\n",
1059 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1060 fprintf (stream, " column and range bits: %i\n",
1061 map->m_column_and_range_bits);
1062 fprintf (stream, " column bits: %i\n",
1063 map->m_column_and_range_bits - map->m_range_bits);
1064 fprintf (stream, " range bits: %i\n",
1065 map->m_range_bits);
1067 /* Render the span of source lines that this "map" covers. */
1068 for (source_location loc = MAP_START_LOCATION (map);
1069 loc < end_location;
1070 loc += (1 << map->m_range_bits) )
1072 gcc_assert (pure_location_p (line_table, loc) );
1074 expanded_location exploc
1075 = linemap_expand_location (line_table, map, loc);
1077 if (0 == exploc.column)
1079 /* Beginning of a new source line: draw the line. */
1081 int line_size;
1082 const char *line_text = location_get_source_line (exploc.file,
1083 exploc.line,
1084 &line_size);
1085 if (!line_text)
1086 break;
1087 fprintf (stream,
1088 "%s:%3i|loc:%5i|%.*s\n",
1089 exploc.file, exploc.line,
1090 loc,
1091 line_size, line_text);
1093 /* "loc" is at column 0, which means "the whole line".
1094 Render the locations *within* the line, by underlining
1095 it, showing the source_location numeric values
1096 at each column. */
1097 int max_col = (1 << map->m_column_and_range_bits) - 1;
1098 if (max_col > line_size)
1099 max_col = line_size + 1;
1101 int indent = 14 + strlen (exploc.file);
1103 /* Thousands. */
1104 if (end_location > 999)
1105 write_digit_row (stream, indent, map, loc, max_col, 1000);
1107 /* Hundreds. */
1108 if (end_location > 99)
1109 write_digit_row (stream, indent, map, loc, max_col, 100);
1111 /* Tens. */
1112 write_digit_row (stream, indent, map, loc, max_col, 10);
1114 /* Units. */
1115 write_digit_row (stream, indent, map, loc, max_col, 1);
1118 fprintf (stream, "\n");
1121 /* Visualize unallocated values. */
1122 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1123 line_table->highest_location,
1124 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1126 /* Visualize the macro line_map instances, rendering the sources. */
1127 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1129 /* Each macro map that is allocated owns source_location values
1130 that are *lower* that the one before them.
1131 Hence it's meaningful to view them either in order of ascending
1132 source locations, or in order of ascending macro map index. */
1133 const bool ascending_source_locations = true;
1134 unsigned int idx = (ascending_source_locations
1135 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1136 : i);
1137 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1138 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1139 idx,
1140 linemap_map_get_macro_name (map),
1141 MACRO_MAP_NUM_MACRO_TOKENS (map));
1142 dump_location_range (stream,
1143 map->start_location,
1144 (map->start_location
1145 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1146 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1147 "expansion point is location %i",
1148 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1149 fprintf (stream, " map->start_location: %u\n",
1150 map->start_location);
1152 fprintf (stream, " macro_locations:\n");
1153 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1155 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1156 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1158 /* linemap_add_macro_token encodes token numbers in an expansion
1159 by putting them after MAP_START_LOCATION. */
1161 /* I'm typically seeing 4 uninitialized entries at the end of
1162 0xafafafaf.
1163 This appears to be due to macro.c:replace_args
1164 adding 2 extra args for padding tokens; presumably there may
1165 be a leading and/or trailing padding token injected,
1166 each for 2 more location slots.
1167 This would explain there being up to 4 source_locations slots
1168 that may be uninitialized. */
1170 fprintf (stream, " %u: %u, %u\n",
1174 if (x == y)
1176 if (x < MAP_START_LOCATION (map))
1177 inform (x, "token %u has x-location == y-location == %u", i, x);
1178 else
1179 fprintf (stream,
1180 "x-location == y-location == %u encodes token # %u\n",
1181 x, x - MAP_START_LOCATION (map));
1183 else
1185 inform (x, "token %u has x-location == %u", i, x);
1186 inform (x, "token %u has y-location == %u", i, y);
1189 fprintf (stream, "\n");
1192 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1193 macro map, presumably due to an off-by-one error somewhere
1194 between the logic in linemap_enter_macro and
1195 LINEMAPS_MACRO_LOWEST_LOCATION. */
1196 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1197 MAX_SOURCE_LOCATION,
1198 MAX_SOURCE_LOCATION + 1);
1200 /* Visualize ad-hoc values. */
1201 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1202 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1205 /* string_concat's constructor. */
1207 string_concat::string_concat (int num, location_t *locs)
1208 : m_num (num)
1210 m_locs = ggc_vec_alloc <location_t> (num);
1211 for (int i = 0; i < num; i++)
1212 m_locs[i] = locs[i];
1215 /* string_concat_db's constructor. */
1217 string_concat_db::string_concat_db ()
1219 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1222 /* Record that a string concatenation occurred, covering NUM
1223 string literal tokens. LOCS is an array of size NUM, containing the
1224 locations of the tokens. A copy of LOCS is taken. */
1226 void
1227 string_concat_db::record_string_concatenation (int num, location_t *locs)
1229 gcc_assert (num > 1);
1230 gcc_assert (locs);
1232 location_t key_loc = get_key_loc (locs[0]);
1234 string_concat *concat
1235 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1236 m_table->put (key_loc, concat);
1239 /* Determine if LOC was the location of the the initial token of a
1240 concatenation of string literal tokens.
1241 If so, *OUT_NUM is written to with the number of tokens, and
1242 *OUT_LOCS with the location of an array of locations of the
1243 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1244 storage owned by the string_concat_db.
1245 Otherwise, return false. */
1247 bool
1248 string_concat_db::get_string_concatenation (location_t loc,
1249 int *out_num,
1250 location_t **out_locs)
1252 gcc_assert (out_num);
1253 gcc_assert (out_locs);
1255 location_t key_loc = get_key_loc (loc);
1257 string_concat **concat = m_table->get (key_loc);
1258 if (!concat)
1259 return false;
1261 *out_num = (*concat)->m_num;
1262 *out_locs =(*concat)->m_locs;
1263 return true;
1266 /* Internal function. Canonicalize LOC into a form suitable for
1267 use as a key within the database, stripping away macro expansion,
1268 ad-hoc information, and range information, using the location of
1269 the start of LOC within an ordinary linemap. */
1271 location_t
1272 string_concat_db::get_key_loc (location_t loc)
1274 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1275 NULL);
1277 loc = get_range_from_loc (line_table, loc).m_start;
1279 return loc;
1282 /* Helper class for use within get_substring_ranges_for_loc.
1283 An vec of cpp_string with responsibility for releasing all of the
1284 str->text for each str in the vector. */
1286 class auto_cpp_string_vec : public auto_vec <cpp_string>
1288 public:
1289 auto_cpp_string_vec (int alloc)
1290 : auto_vec <cpp_string> (alloc) {}
1292 ~auto_cpp_string_vec ()
1294 /* Clean up the copies within this vec. */
1295 int i;
1296 cpp_string *str;
1297 FOR_EACH_VEC_ELT (*this, i, str)
1298 free (const_cast <unsigned char *> (str->text));
1302 /* Attempt to populate RANGES with source location information on the
1303 individual characters within the string literal found at STRLOC.
1304 If CONCATS is non-NULL, then any string literals that the token at
1305 STRLOC was concatenated with are also added to RANGES.
1307 Return NULL if successful, or an error message if any errors occurred (in
1308 which case RANGES may be only partially populated and should not
1309 be used).
1311 This is implemented by re-parsing the relevant source line(s). */
1313 static const char *
1314 get_substring_ranges_for_loc (cpp_reader *pfile,
1315 string_concat_db *concats,
1316 location_t strloc,
1317 enum cpp_ttype type,
1318 cpp_substring_ranges &ranges)
1320 gcc_assert (pfile);
1322 if (strloc == UNKNOWN_LOCATION)
1323 return "unknown location";
1325 /* Reparsing the strings requires accurate location information.
1326 If -ftrack-macro-expansion has been overridden from its default
1327 of 2, then we might have a location of a macro expansion point,
1328 rather than the location of the literal itself.
1329 Avoid this by requiring that we have full macro expansion tracking
1330 for substring locations to be available. */
1331 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1332 return "track_macro_expansion != 2";
1334 /* If #line or # 44 "file"-style directives are present, then there's
1335 no guarantee that the line numbers we have can be used to locate
1336 the strings. For example, we might have a .i file with # directives
1337 pointing back to lines within a .c file, but the .c file might
1338 have been edited since the .i file was created.
1339 In such a case, the safest course is to disable on-demand substring
1340 locations. */
1341 if (line_table->seen_line_directive)
1342 return "seen line directive";
1344 /* If string concatenation has occurred at STRLOC, get the locations
1345 of all of the literal tokens making up the compound string.
1346 Otherwise, just use STRLOC. */
1347 int num_locs = 1;
1348 location_t *strlocs = &strloc;
1349 if (concats)
1350 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1352 auto_cpp_string_vec strs (num_locs);
1353 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1354 for (int i = 0; i < num_locs; i++)
1356 /* Get range of strloc. We will use it to locate the start and finish
1357 of the literal token within the line. */
1358 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1360 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1361 /* If the string is within a macro expansion, we can't get at the
1362 end location. */
1363 return "macro expansion";
1365 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1366 /* If so, we can't reliably determine where the token started within
1367 its line. */
1368 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1370 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1371 /* If so, we can't reliably determine where the token finished within
1372 its line. */
1373 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1375 expanded_location start
1376 = expand_location_to_spelling_point (src_range.m_start);
1377 expanded_location finish
1378 = expand_location_to_spelling_point (src_range.m_finish);
1379 if (start.file != finish.file)
1380 return "range endpoints are in different files";
1381 if (start.line != finish.line)
1382 return "range endpoints are on different lines";
1383 if (start.column > finish.column)
1384 return "range endpoints are reversed";
1386 int line_width;
1387 const char *line = location_get_source_line (start.file, start.line,
1388 &line_width);
1389 if (line == NULL)
1390 return "unable to read source line";
1392 /* Determine the location of the literal (including quotes
1393 and leading prefix chars, such as the 'u' in a u""
1394 token). */
1395 const char *literal = line + start.column - 1;
1396 int literal_length = finish.column - start.column + 1;
1398 gcc_assert (line_width >= (start.column - 1 + literal_length));
1399 cpp_string from;
1400 from.len = literal_length;
1401 /* Make a copy of the literal, to avoid having to rely on
1402 the lifetime of the copy of the line within the cache.
1403 This will be released by the auto_cpp_string_vec dtor. */
1404 from.text = XDUPVEC (unsigned char, literal, literal_length);
1405 strs.safe_push (from);
1407 /* For very long lines, a new linemap could have started
1408 halfway through the token.
1409 Ensure that the loc_reader uses the linemap of the
1410 *end* of the token for its start location. */
1411 const line_map_ordinary *final_ord_map;
1412 linemap_resolve_location (line_table, src_range.m_finish,
1413 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1414 location_t start_loc
1415 = linemap_position_for_line_and_column (line_table, final_ord_map,
1416 start.line, start.column);
1418 cpp_string_location_reader loc_reader (start_loc, line_table);
1419 loc_readers.safe_push (loc_reader);
1422 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1423 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1424 loc_readers.address (),
1425 num_locs, &ranges, type);
1426 if (err)
1427 return err;
1429 /* Success: "ranges" should now contain information on the string. */
1430 return NULL;
1433 /* Attempt to populate *OUT_LOC with source location information on the
1434 given characters within the string literal found at STRLOC.
1435 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1436 character set.
1438 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1439 and string literal "012345\n789"
1440 *OUT_LOC is written to with:
1441 "012345\n789"
1442 ~^~~~~
1444 If CONCATS is non-NULL, then any string literals that the token at
1445 STRLOC was concatenated with are also considered.
1447 This is implemented by re-parsing the relevant source line(s).
1449 Return NULL if successful, or an error message if any errors occurred.
1450 Error messages are intended for GCC developers (to help debugging) rather
1451 than for end-users. */
1453 const char *
1454 get_source_location_for_substring (cpp_reader *pfile,
1455 string_concat_db *concats,
1456 location_t strloc,
1457 enum cpp_ttype type,
1458 int caret_idx, int start_idx, int end_idx,
1459 source_location *out_loc)
1461 gcc_checking_assert (caret_idx >= 0);
1462 gcc_checking_assert (start_idx >= 0);
1463 gcc_checking_assert (end_idx >= 0);
1464 gcc_assert (out_loc);
1466 cpp_substring_ranges ranges;
1467 const char *err
1468 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1469 if (err)
1470 return err;
1472 if (caret_idx >= ranges.get_num_ranges ())
1473 return "caret_idx out of range";
1474 if (start_idx >= ranges.get_num_ranges ())
1475 return "start_idx out of range";
1476 if (end_idx >= ranges.get_num_ranges ())
1477 return "end_idx out of range";
1479 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1480 ranges.get_range (start_idx).m_start,
1481 ranges.get_range (end_idx).m_finish);
1482 return NULL;
1485 #if CHECKING_P
1487 namespace selftest {
1489 /* Selftests of location handling. */
1491 /* Attempt to populate *OUT_RANGE with source location information on the
1492 given character within the string literal found at STRLOC.
1493 CHAR_IDX refers to an offset within the execution character set.
1494 If CONCATS is non-NULL, then any string literals that the token at
1495 STRLOC was concatenated with are also considered.
1497 This is implemented by re-parsing the relevant source line(s).
1499 Return NULL if successful, or an error message if any errors occurred.
1500 Error messages are intended for GCC developers (to help debugging) rather
1501 than for end-users. */
1503 static const char *
1504 get_source_range_for_char (cpp_reader *pfile,
1505 string_concat_db *concats,
1506 location_t strloc,
1507 enum cpp_ttype type,
1508 int char_idx,
1509 source_range *out_range)
1511 gcc_checking_assert (char_idx >= 0);
1512 gcc_assert (out_range);
1514 cpp_substring_ranges ranges;
1515 const char *err
1516 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1517 if (err)
1518 return err;
1520 if (char_idx >= ranges.get_num_ranges ())
1521 return "char_idx out of range";
1523 *out_range = ranges.get_range (char_idx);
1524 return NULL;
1527 /* As get_source_range_for_char, but write to *OUT the number
1528 of ranges that are available. */
1530 static const char *
1531 get_num_source_ranges_for_substring (cpp_reader *pfile,
1532 string_concat_db *concats,
1533 location_t strloc,
1534 enum cpp_ttype type,
1535 int *out)
1537 gcc_assert (out);
1539 cpp_substring_ranges ranges;
1540 const char *err
1541 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1543 if (err)
1544 return err;
1546 *out = ranges.get_num_ranges ();
1547 return NULL;
1550 /* Selftests of location handling. */
1552 /* Helper function for verifying location data: when location_t
1553 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1554 as having column 0. */
1556 static bool
1557 should_have_column_data_p (location_t loc)
1559 if (IS_ADHOC_LOC (loc))
1560 loc = get_location_from_adhoc_loc (line_table, loc);
1561 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1562 return false;
1563 return true;
1566 /* Selftest for should_have_column_data_p. */
1568 static void
1569 test_should_have_column_data_p ()
1571 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1572 ASSERT_TRUE
1573 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1574 ASSERT_FALSE
1575 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1578 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1579 on LOC. */
1581 static void
1582 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1583 location_t loc)
1585 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1586 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1587 /* If location_t values are sufficiently high, then column numbers
1588 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1589 When close to the threshold, column numbers *may* be present: if
1590 the final linemap before the threshold contains a line that straddles
1591 the threshold, locations in that line have column information. */
1592 if (should_have_column_data_p (loc))
1593 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1596 /* Various selftests involve constructing a line table and one or more
1597 line maps within it.
1599 For maximum test coverage we want to run these tests with a variety
1600 of situations:
1601 - line_table->default_range_bits: some frontends use a non-zero value
1602 and others use zero
1603 - the fallback modes within line-map.c: there are various threshold
1604 values for source_location/location_t beyond line-map.c changes
1605 behavior (disabling of the range-packing optimization, disabling
1606 of column-tracking). We can exercise these by starting the line_table
1607 at interesting values at or near these thresholds.
1609 The following struct describes a particular case within our test
1610 matrix. */
1612 struct line_table_case
1614 line_table_case (int default_range_bits, int base_location)
1615 : m_default_range_bits (default_range_bits),
1616 m_base_location (base_location)
1619 int m_default_range_bits;
1620 int m_base_location;
1623 /* Constructor. Store the old value of line_table, and create a new
1624 one, using sane defaults. */
1626 line_table_test::line_table_test ()
1628 gcc_assert (saved_line_table == NULL);
1629 saved_line_table = line_table;
1630 line_table = ggc_alloc<line_maps> ();
1631 linemap_init (line_table, BUILTINS_LOCATION);
1632 gcc_assert (saved_line_table->reallocator);
1633 line_table->reallocator = saved_line_table->reallocator;
1634 gcc_assert (saved_line_table->round_alloc_size);
1635 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1636 line_table->default_range_bits = 0;
1639 /* Constructor. Store the old value of line_table, and create a new
1640 one, using the sitation described in CASE_. */
1642 line_table_test::line_table_test (const line_table_case &case_)
1644 gcc_assert (saved_line_table == NULL);
1645 saved_line_table = line_table;
1646 line_table = ggc_alloc<line_maps> ();
1647 linemap_init (line_table, BUILTINS_LOCATION);
1648 gcc_assert (saved_line_table->reallocator);
1649 line_table->reallocator = saved_line_table->reallocator;
1650 gcc_assert (saved_line_table->round_alloc_size);
1651 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1652 line_table->default_range_bits = case_.m_default_range_bits;
1653 if (case_.m_base_location)
1655 line_table->highest_location = case_.m_base_location;
1656 line_table->highest_line = case_.m_base_location;
1660 /* Destructor. Restore the old value of line_table. */
1662 line_table_test::~line_table_test ()
1664 gcc_assert (saved_line_table != NULL);
1665 line_table = saved_line_table;
1666 saved_line_table = NULL;
1669 /* Verify basic operation of ordinary linemaps. */
1671 static void
1672 test_accessing_ordinary_linemaps (const line_table_case &case_)
1674 line_table_test ltt (case_);
1676 /* Build a simple linemap describing some locations. */
1677 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1679 linemap_line_start (line_table, 1, 100);
1680 location_t loc_a = linemap_position_for_column (line_table, 1);
1681 location_t loc_b = linemap_position_for_column (line_table, 23);
1683 linemap_line_start (line_table, 2, 100);
1684 location_t loc_c = linemap_position_for_column (line_table, 1);
1685 location_t loc_d = linemap_position_for_column (line_table, 17);
1687 /* Example of a very long line. */
1688 linemap_line_start (line_table, 3, 2000);
1689 location_t loc_e = linemap_position_for_column (line_table, 700);
1691 /* Transitioning back to a short line. */
1692 linemap_line_start (line_table, 4, 0);
1693 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1695 if (should_have_column_data_p (loc_back_to_short))
1697 /* Verify that we switched to short lines in the linemap. */
1698 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1699 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1702 /* Example of a line that will eventually be seen to be longer
1703 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1704 below that. */
1705 linemap_line_start (line_table, 5, 2000);
1707 location_t loc_start_of_very_long_line
1708 = linemap_position_for_column (line_table, 2000);
1709 location_t loc_too_wide
1710 = linemap_position_for_column (line_table, 4097);
1711 location_t loc_too_wide_2
1712 = linemap_position_for_column (line_table, 4098);
1714 /* ...and back to a sane line length. */
1715 linemap_line_start (line_table, 6, 100);
1716 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1718 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1720 /* Multiple files. */
1721 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1722 linemap_line_start (line_table, 1, 200);
1723 location_t loc_f = linemap_position_for_column (line_table, 150);
1724 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1726 /* Verify that we can recover the location info. */
1727 assert_loceq ("foo.c", 1, 1, loc_a);
1728 assert_loceq ("foo.c", 1, 23, loc_b);
1729 assert_loceq ("foo.c", 2, 1, loc_c);
1730 assert_loceq ("foo.c", 2, 17, loc_d);
1731 assert_loceq ("foo.c", 3, 700, loc_e);
1732 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1734 /* In the very wide line, the initial location should be fully tracked. */
1735 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1736 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1737 be disabled. */
1738 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1739 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1740 /*...and column-tracking should be re-enabled for subsequent lines. */
1741 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1743 assert_loceq ("bar.c", 1, 150, loc_f);
1745 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1746 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1748 /* Verify using make_location to build a range, and extracting data
1749 back from it. */
1750 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1751 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1752 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1753 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1754 ASSERT_EQ (loc_b, src_range.m_start);
1755 ASSERT_EQ (loc_d, src_range.m_finish);
1758 /* Verify various properties of UNKNOWN_LOCATION. */
1760 static void
1761 test_unknown_location ()
1763 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1764 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1765 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1768 /* Verify various properties of BUILTINS_LOCATION. */
1770 static void
1771 test_builtins ()
1773 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1774 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1777 /* Regression test for make_location.
1778 Ensure that we use pure locations for the start/finish of the range,
1779 rather than storing a packed or ad-hoc range as the start/finish. */
1781 static void
1782 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1784 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1785 with C++ frontend.
1786 ....................0000000001111111111222.
1787 ....................1234567890123456789012. */
1788 const char *content = " r += !aaa == bbb;\n";
1789 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1790 line_table_test ltt (case_);
1791 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1793 const location_t c11 = linemap_position_for_column (line_table, 11);
1794 const location_t c12 = linemap_position_for_column (line_table, 12);
1795 const location_t c13 = linemap_position_for_column (line_table, 13);
1796 const location_t c14 = linemap_position_for_column (line_table, 14);
1797 const location_t c21 = linemap_position_for_column (line_table, 21);
1799 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1800 return;
1802 /* Use column 13 for the caret location, arbitrarily, to verify that we
1803 handle start != caret. */
1804 const location_t aaa = make_location (c13, c12, c14);
1805 ASSERT_EQ (c13, get_pure_location (aaa));
1806 ASSERT_EQ (c12, get_start (aaa));
1807 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1808 ASSERT_EQ (c14, get_finish (aaa));
1809 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1811 /* Make a location using a location with a range as the start-point. */
1812 const location_t not_aaa = make_location (c11, aaa, c14);
1813 ASSERT_EQ (c11, get_pure_location (not_aaa));
1814 /* It should use the start location of the range, not store the range
1815 itself. */
1816 ASSERT_EQ (c12, get_start (not_aaa));
1817 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1818 ASSERT_EQ (c14, get_finish (not_aaa));
1819 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1821 /* Similarly, make a location with a range as the end-point. */
1822 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1823 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1824 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1825 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1826 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1827 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1828 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1829 /* It should use the finish location of the range, not store the range
1830 itself. */
1831 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1832 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1833 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1834 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1835 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1838 /* Verify reading of input files (e.g. for caret-based diagnostics). */
1840 static void
1841 test_reading_source_line ()
1843 /* Create a tempfile and write some text to it. */
1844 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1845 "01234567890123456789\n"
1846 "This is the test text\n"
1847 "This is the 3rd line");
1849 /* Read back a specific line from the tempfile. */
1850 int line_size;
1851 const char *source_line = location_get_source_line (tmp.get_filename (),
1852 3, &line_size);
1853 ASSERT_TRUE (source_line != NULL);
1854 ASSERT_EQ (20, line_size);
1855 ASSERT_TRUE (!strncmp ("This is the 3rd line",
1856 source_line, line_size));
1858 source_line = location_get_source_line (tmp.get_filename (),
1859 2, &line_size);
1860 ASSERT_TRUE (source_line != NULL);
1861 ASSERT_EQ (21, line_size);
1862 ASSERT_TRUE (!strncmp ("This is the test text",
1863 source_line, line_size));
1865 source_line = location_get_source_line (tmp.get_filename (),
1866 4, &line_size);
1867 ASSERT_TRUE (source_line == NULL);
1870 /* Tests of lexing. */
1872 /* Verify that token TOK from PARSER has cpp_token_as_text
1873 equal to EXPECTED_TEXT. */
1875 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1876 SELFTEST_BEGIN_STMT \
1877 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1878 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1879 SELFTEST_END_STMT
1881 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1882 and ranges from EXP_START_COL to EXP_FINISH_COL.
1883 Use LOC as the effective location of the selftest. */
1885 static void
1886 assert_token_loc_eq (const location &loc,
1887 const cpp_token *tok,
1888 const char *exp_filename, int exp_linenum,
1889 int exp_start_col, int exp_finish_col)
1891 location_t tok_loc = tok->src_loc;
1892 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1893 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1895 /* If location_t values are sufficiently high, then column numbers
1896 will be unavailable. */
1897 if (!should_have_column_data_p (tok_loc))
1898 return;
1900 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1901 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1902 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1903 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1906 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1907 SELFTEST_LOCATION as the effective location of the selftest. */
1909 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1910 EXP_START_COL, EXP_FINISH_COL) \
1911 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1912 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1914 /* Test of lexing a file using libcpp, verifying tokens and their
1915 location information. */
1917 static void
1918 test_lexer (const line_table_case &case_)
1920 /* Create a tempfile and write some text to it. */
1921 const char *content =
1922 /*00000000011111111112222222222333333.3333444444444.455555555556
1923 12345678901234567890123456789012345.6789012345678.901234567890. */
1924 ("test_name /* c-style comment */\n"
1925 " \"test literal\"\n"
1926 " // test c++-style comment\n"
1927 " 42\n");
1928 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1930 line_table_test ltt (case_);
1932 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1934 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1935 ASSERT_NE (fname, NULL);
1937 /* Verify that we get the expected tokens back, with the correct
1938 location information. */
1940 location_t loc;
1941 const cpp_token *tok;
1942 tok = cpp_get_token_with_location (parser, &loc);
1943 ASSERT_NE (tok, NULL);
1944 ASSERT_EQ (tok->type, CPP_NAME);
1945 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1946 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1948 tok = cpp_get_token_with_location (parser, &loc);
1949 ASSERT_NE (tok, NULL);
1950 ASSERT_EQ (tok->type, CPP_STRING);
1951 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1952 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1954 tok = cpp_get_token_with_location (parser, &loc);
1955 ASSERT_NE (tok, NULL);
1956 ASSERT_EQ (tok->type, CPP_NUMBER);
1957 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1958 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1960 tok = cpp_get_token_with_location (parser, &loc);
1961 ASSERT_NE (tok, NULL);
1962 ASSERT_EQ (tok->type, CPP_EOF);
1964 cpp_finish (parser, NULL);
1965 cpp_destroy (parser);
1968 /* Forward decls. */
1970 struct lexer_test;
1971 class lexer_test_options;
1973 /* A class for specifying options of a lexer_test.
1974 The "apply" vfunc is called during the lexer_test constructor. */
1976 class lexer_test_options
1978 public:
1979 virtual void apply (lexer_test &) = 0;
1982 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
1983 in its dtor.
1985 This is needed by struct lexer_test to ensure that the cleanup of the
1986 cpp_reader happens *after* the cleanup of the temp_source_file. */
1988 class cpp_reader_ptr
1990 public:
1991 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
1993 ~cpp_reader_ptr ()
1995 cpp_finish (m_ptr, NULL);
1996 cpp_destroy (m_ptr);
1999 operator cpp_reader * () const { return m_ptr; }
2001 private:
2002 cpp_reader *m_ptr;
2005 /* A struct for writing lexer tests. */
2007 struct lexer_test
2009 lexer_test (const line_table_case &case_, const char *content,
2010 lexer_test_options *options);
2011 ~lexer_test ();
2013 const cpp_token *get_token ();
2015 /* The ordering of these fields matters.
2016 The line_table_test must be first, since the cpp_reader_ptr
2017 uses it.
2018 The cpp_reader must be cleaned up *after* the temp_source_file
2019 since the filenames in input.c's input cache are owned by the
2020 cpp_reader; in particular, when ~temp_source_file evicts the
2021 filename the filenames must still be alive. */
2022 line_table_test m_ltt;
2023 cpp_reader_ptr m_parser;
2024 temp_source_file m_tempfile;
2025 string_concat_db m_concats;
2026 bool m_implicitly_expect_EOF;
2029 /* Use an EBCDIC encoding for the execution charset, specifically
2030 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2032 This exercises iconv integration within libcpp.
2033 Not every build of iconv supports the given charset,
2034 so we need to flag this error and handle it gracefully. */
2036 class ebcdic_execution_charset : public lexer_test_options
2038 public:
2039 ebcdic_execution_charset () : m_num_iconv_errors (0)
2041 gcc_assert (s_singleton == NULL);
2042 s_singleton = this;
2044 ~ebcdic_execution_charset ()
2046 gcc_assert (s_singleton == this);
2047 s_singleton = NULL;
2050 void apply (lexer_test &test) FINAL OVERRIDE
2052 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2053 cpp_opts->narrow_charset = "IBM1047";
2055 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2056 callbacks->error = on_error;
2059 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2060 int level ATTRIBUTE_UNUSED,
2061 int reason ATTRIBUTE_UNUSED,
2062 rich_location *richloc ATTRIBUTE_UNUSED,
2063 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2064 ATTRIBUTE_FPTR_PRINTF(5,0)
2066 gcc_assert (s_singleton);
2067 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2068 const char *msg = "conversion from %s to %s not supported by iconv";
2069 #ifdef ENABLE_NLS
2070 msg = dgettext ("cpplib", msg);
2071 #endif
2072 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2073 when the local iconv build doesn't support the conversion. */
2074 if (strcmp (msgid, msg) == 0)
2076 s_singleton->m_num_iconv_errors++;
2077 return true;
2080 /* Otherwise, we have an unexpected error. */
2081 abort ();
2084 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2086 private:
2087 static ebcdic_execution_charset *s_singleton;
2088 int m_num_iconv_errors;
2091 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2093 /* A lexer_test_options subclass that records a list of error
2094 messages emitted by the lexer. */
2096 class lexer_error_sink : public lexer_test_options
2098 public:
2099 lexer_error_sink ()
2101 gcc_assert (s_singleton == NULL);
2102 s_singleton = this;
2104 ~lexer_error_sink ()
2106 gcc_assert (s_singleton == this);
2107 s_singleton = NULL;
2109 int i;
2110 char *str;
2111 FOR_EACH_VEC_ELT (m_errors, i, str)
2112 free (str);
2115 void apply (lexer_test &test) FINAL OVERRIDE
2117 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2118 callbacks->error = on_error;
2121 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2122 int level ATTRIBUTE_UNUSED,
2123 int reason ATTRIBUTE_UNUSED,
2124 rich_location *richloc ATTRIBUTE_UNUSED,
2125 const char *msgid, va_list *ap)
2126 ATTRIBUTE_FPTR_PRINTF(5,0)
2128 char *msg = xvasprintf (msgid, *ap);
2129 s_singleton->m_errors.safe_push (msg);
2130 return true;
2133 auto_vec<char *> m_errors;
2135 private:
2136 static lexer_error_sink *s_singleton;
2139 lexer_error_sink *lexer_error_sink::s_singleton;
2141 /* Constructor. Override line_table with a new instance based on CASE_,
2142 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2143 start parsing the tempfile. */
2145 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2146 lexer_test_options *options)
2147 : m_ltt (case_),
2148 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2149 /* Create a tempfile and write the text to it. */
2150 m_tempfile (SELFTEST_LOCATION, ".c", content),
2151 m_concats (),
2152 m_implicitly_expect_EOF (true)
2154 if (options)
2155 options->apply (*this);
2157 cpp_init_iconv (m_parser);
2159 /* Parse the file. */
2160 const char *fname = cpp_read_main_file (m_parser,
2161 m_tempfile.get_filename ());
2162 ASSERT_NE (fname, NULL);
2165 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2167 lexer_test::~lexer_test ()
2169 location_t loc;
2170 const cpp_token *tok;
2172 if (m_implicitly_expect_EOF)
2174 tok = cpp_get_token_with_location (m_parser, &loc);
2175 ASSERT_NE (tok, NULL);
2176 ASSERT_EQ (tok->type, CPP_EOF);
2180 /* Get the next token from m_parser. */
2182 const cpp_token *
2183 lexer_test::get_token ()
2185 location_t loc;
2186 const cpp_token *tok;
2188 tok = cpp_get_token_with_location (m_parser, &loc);
2189 ASSERT_NE (tok, NULL);
2190 return tok;
2193 /* Verify that locations within string literals are correctly handled. */
2195 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2196 using the string concatenation database for TEST.
2198 Assert that the character at index IDX is on EXPECTED_LINE,
2199 and that it begins at column EXPECTED_START_COL and ends at
2200 EXPECTED_FINISH_COL (unless the locations are beyond
2201 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2202 columns). */
2204 static void
2205 assert_char_at_range (const location &loc,
2206 lexer_test& test,
2207 location_t strloc, enum cpp_ttype type, int idx,
2208 int expected_line, int expected_start_col,
2209 int expected_finish_col)
2211 cpp_reader *pfile = test.m_parser;
2212 string_concat_db *concats = &test.m_concats;
2214 source_range actual_range = source_range();
2215 const char *err
2216 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2217 &actual_range);
2218 if (should_have_column_data_p (strloc))
2219 ASSERT_EQ_AT (loc, NULL, err);
2220 else
2222 ASSERT_STREQ_AT (loc,
2223 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2224 err);
2225 return;
2228 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2229 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2230 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2231 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2233 if (should_have_column_data_p (actual_range.m_start))
2235 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2236 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2238 if (should_have_column_data_p (actual_range.m_finish))
2240 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2241 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2245 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2246 the effective location of any errors. */
2248 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2249 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2250 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2251 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2252 (EXPECTED_FINISH_COL))
2254 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2255 using the string concatenation database for TEST.
2257 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2259 static void
2260 assert_num_substring_ranges (const location &loc,
2261 lexer_test& test,
2262 location_t strloc,
2263 enum cpp_ttype type,
2264 int expected_num_ranges)
2266 cpp_reader *pfile = test.m_parser;
2267 string_concat_db *concats = &test.m_concats;
2269 int actual_num_ranges = -1;
2270 const char *err
2271 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2272 &actual_num_ranges);
2273 if (should_have_column_data_p (strloc))
2274 ASSERT_EQ_AT (loc, NULL, err);
2275 else
2277 ASSERT_STREQ_AT (loc,
2278 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2279 err);
2280 return;
2282 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2285 /* Macro for calling assert_num_substring_ranges, supplying
2286 SELFTEST_LOCATION for the effective location of any errors. */
2288 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2289 EXPECTED_NUM_RANGES) \
2290 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2291 (TYPE), (EXPECTED_NUM_RANGES))
2294 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2295 returns an error (using the string concatenation database for TEST). */
2297 static void
2298 assert_has_no_substring_ranges (const location &loc,
2299 lexer_test& test,
2300 location_t strloc,
2301 enum cpp_ttype type,
2302 const char *expected_err)
2304 cpp_reader *pfile = test.m_parser;
2305 string_concat_db *concats = &test.m_concats;
2306 cpp_substring_ranges ranges;
2307 const char *actual_err
2308 = get_substring_ranges_for_loc (pfile, concats, strloc,
2309 type, ranges);
2310 if (should_have_column_data_p (strloc))
2311 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2312 else
2313 ASSERT_STREQ_AT (loc,
2314 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2315 actual_err);
2318 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2319 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2320 (STRLOC), (TYPE), (ERR))
2322 /* Lex a simple string literal. Verify the substring location data, before
2323 and after running cpp_interpret_string on it. */
2325 static void
2326 test_lexer_string_locations_simple (const line_table_case &case_)
2328 /* Digits 0-9 (with 0 at column 10), the simple way.
2329 ....................000000000.11111111112.2222222223333333333
2330 ....................123456789.01234567890.1234567890123456789
2331 We add a trailing comment to ensure that we correctly locate
2332 the end of the string literal token. */
2333 const char *content = " \"0123456789\" /* not a string */\n";
2334 lexer_test test (case_, content, NULL);
2336 /* Verify that we get the expected token back, with the correct
2337 location information. */
2338 const cpp_token *tok = test.get_token ();
2339 ASSERT_EQ (tok->type, CPP_STRING);
2340 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2341 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2343 /* At this point in lexing, the quote characters are treated as part of
2344 the string (they are stripped off by cpp_interpret_string). */
2346 ASSERT_EQ (tok->val.str.len, 12);
2348 /* Verify that cpp_interpret_string works. */
2349 cpp_string dst_string;
2350 const enum cpp_ttype type = CPP_STRING;
2351 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2352 &dst_string, type);
2353 ASSERT_TRUE (result);
2354 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2355 free (const_cast <unsigned char *> (dst_string.text));
2357 /* Verify ranges of individual characters. This no longer includes the
2358 opening quote, but does include the closing quote. */
2359 for (int i = 0; i <= 10; i++)
2360 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2361 10 + i, 10 + i);
2363 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2366 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2367 encoding. */
2369 static void
2370 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2372 /* EBCDIC support requires iconv. */
2373 if (!HAVE_ICONV)
2374 return;
2376 /* Digits 0-9 (with 0 at column 10), the simple way.
2377 ....................000000000.11111111112.2222222223333333333
2378 ....................123456789.01234567890.1234567890123456789
2379 We add a trailing comment to ensure that we correctly locate
2380 the end of the string literal token. */
2381 const char *content = " \"0123456789\" /* not a string */\n";
2382 ebcdic_execution_charset use_ebcdic;
2383 lexer_test test (case_, content, &use_ebcdic);
2385 /* Verify that we get the expected token back, with the correct
2386 location information. */
2387 const cpp_token *tok = test.get_token ();
2388 ASSERT_EQ (tok->type, CPP_STRING);
2389 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2390 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2392 /* At this point in lexing, the quote characters are treated as part of
2393 the string (they are stripped off by cpp_interpret_string). */
2395 ASSERT_EQ (tok->val.str.len, 12);
2397 /* The remainder of the test requires an iconv implementation that
2398 can convert from UTF-8 to the EBCDIC encoding requested above. */
2399 if (use_ebcdic.iconv_errors_occurred_p ())
2400 return;
2402 /* Verify that cpp_interpret_string works. */
2403 cpp_string dst_string;
2404 const enum cpp_ttype type = CPP_STRING;
2405 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2406 &dst_string, type);
2407 ASSERT_TRUE (result);
2408 /* We should now have EBCDIC-encoded text, specifically
2409 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2410 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2411 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2412 (const char *)dst_string.text);
2413 free (const_cast <unsigned char *> (dst_string.text));
2415 /* Verify that we don't attempt to record substring location information
2416 for such cases. */
2417 ASSERT_HAS_NO_SUBSTRING_RANGES
2418 (test, tok->src_loc, type,
2419 "execution character set != source character set");
2422 /* Lex a string literal containing a hex-escaped character.
2423 Verify the substring location data, before and after running
2424 cpp_interpret_string on it. */
2426 static void
2427 test_lexer_string_locations_hex (const line_table_case &case_)
2429 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2430 and with a space in place of digit 6, to terminate the escaped
2431 hex code.
2432 ....................000000000.111111.11112222.
2433 ....................123456789.012345.67890123. */
2434 const char *content = " \"01234\\x35 789\"\n";
2435 lexer_test test (case_, content, NULL);
2437 /* Verify that we get the expected token back, with the correct
2438 location information. */
2439 const cpp_token *tok = test.get_token ();
2440 ASSERT_EQ (tok->type, CPP_STRING);
2441 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2442 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2444 /* At this point in lexing, the quote characters are treated as part of
2445 the string (they are stripped off by cpp_interpret_string). */
2446 ASSERT_EQ (tok->val.str.len, 15);
2448 /* Verify that cpp_interpret_string works. */
2449 cpp_string dst_string;
2450 const enum cpp_ttype type = CPP_STRING;
2451 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2452 &dst_string, type);
2453 ASSERT_TRUE (result);
2454 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2455 free (const_cast <unsigned char *> (dst_string.text));
2457 /* Verify ranges of individual characters. This no longer includes the
2458 opening quote, but does include the closing quote. */
2459 for (int i = 0; i <= 4; i++)
2460 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2461 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2462 for (int i = 6; i <= 10; i++)
2463 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2465 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2468 /* Lex a string literal containing an octal-escaped character.
2469 Verify the substring location data after running cpp_interpret_string
2470 on it. */
2472 static void
2473 test_lexer_string_locations_oct (const line_table_case &case_)
2475 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2476 and with a space in place of digit 6, to terminate the escaped
2477 octal code.
2478 ....................000000000.111111.11112222.2222223333333333444
2479 ....................123456789.012345.67890123.4567890123456789012 */
2480 const char *content = " \"01234\\065 789\" /* not a string */\n";
2481 lexer_test test (case_, content, NULL);
2483 /* Verify that we get the expected token back, with the correct
2484 location information. */
2485 const cpp_token *tok = test.get_token ();
2486 ASSERT_EQ (tok->type, CPP_STRING);
2487 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2489 /* Verify that cpp_interpret_string works. */
2490 cpp_string dst_string;
2491 const enum cpp_ttype type = CPP_STRING;
2492 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2493 &dst_string, type);
2494 ASSERT_TRUE (result);
2495 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2496 free (const_cast <unsigned char *> (dst_string.text));
2498 /* Verify ranges of individual characters. This no longer includes the
2499 opening quote, but does include the closing quote. */
2500 for (int i = 0; i < 5; i++)
2501 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2502 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2503 for (int i = 6; i <= 10; i++)
2504 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2506 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2509 /* Test of string literal containing letter escapes. */
2511 static void
2512 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2514 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2515 .....................000000000.1.11111.1.1.11222.22222223333333
2516 .....................123456789.0.12345.6.7.89012.34567890123456. */
2517 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2518 lexer_test test (case_, content, NULL);
2520 /* Verify that we get the expected tokens back. */
2521 const cpp_token *tok = test.get_token ();
2522 ASSERT_EQ (tok->type, CPP_STRING);
2523 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2525 /* Verify ranges of individual characters. */
2526 /* "\t". */
2527 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2528 0, 1, 10, 11);
2529 /* "foo". */
2530 for (int i = 1; i <= 3; i++)
2531 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2532 i, 1, 11 + i, 11 + i);
2533 /* "\\" and "\n". */
2534 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2535 4, 1, 15, 16);
2536 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2537 5, 1, 17, 18);
2539 /* "bar" and closing quote for nul-terminator. */
2540 for (int i = 6; i <= 9; i++)
2541 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2542 i, 1, 13 + i, 13 + i);
2544 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2547 /* Another test of a string literal containing a letter escape.
2548 Based on string seen in
2549 printf ("%-%\n");
2550 in gcc.dg/format/c90-printf-1.c. */
2552 static void
2553 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2555 /* .....................000000000.1111.11.1111.22222222223.
2556 .....................123456789.0123.45.6789.01234567890. */
2557 const char *content = (" \"%-%\\n\" /* non-str */\n");
2558 lexer_test test (case_, content, NULL);
2560 /* Verify that we get the expected tokens back. */
2561 const cpp_token *tok = test.get_token ();
2562 ASSERT_EQ (tok->type, CPP_STRING);
2563 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2565 /* Verify ranges of individual characters. */
2566 /* "%-%". */
2567 for (int i = 0; i < 3; i++)
2568 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2569 i, 1, 10 + i, 10 + i);
2570 /* "\n". */
2571 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2572 3, 1, 13, 14);
2574 /* Closing quote for nul-terminator. */
2575 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2576 4, 1, 15, 15);
2578 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2581 /* Lex a string literal containing UCN 4 characters.
2582 Verify the substring location data after running cpp_interpret_string
2583 on it. */
2585 static void
2586 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2588 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2589 as UCN 4.
2590 ....................000000000.111111.111122.222222223.33333333344444
2591 ....................123456789.012345.678901.234567890.12345678901234 */
2592 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2593 lexer_test test (case_, content, NULL);
2595 /* Verify that we get the expected token back, with the correct
2596 location information. */
2597 const cpp_token *tok = test.get_token ();
2598 ASSERT_EQ (tok->type, CPP_STRING);
2599 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2601 /* Verify that cpp_interpret_string works.
2602 The string should be encoded in the execution character
2603 set. Assuming that that is UTF-8, we should have the following:
2604 ----------- ---- ----- ------- ----------------
2605 Byte offset Byte Octal Unicode Source Column(s)
2606 ----------- ---- ----- ------- ----------------
2607 0 0x30 '0' 10
2608 1 0x31 '1' 11
2609 2 0x32 '2' 12
2610 3 0x33 '3' 13
2611 4 0x34 '4' 14
2612 5 0xE2 \342 U+2174 15-20
2613 6 0x85 \205 (cont) 15-20
2614 7 0xB4 \264 (cont) 15-20
2615 8 0xE2 \342 U+2175 21-26
2616 9 0x85 \205 (cont) 21-26
2617 10 0xB5 \265 (cont) 21-26
2618 11 0x37 '7' 27
2619 12 0x38 '8' 28
2620 13 0x39 '9' 29
2621 14 0x00 30 (closing quote)
2622 ----------- ---- ----- ------- ---------------. */
2624 cpp_string dst_string;
2625 const enum cpp_ttype type = CPP_STRING;
2626 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2627 &dst_string, type);
2628 ASSERT_TRUE (result);
2629 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2630 (const char *)dst_string.text);
2631 free (const_cast <unsigned char *> (dst_string.text));
2633 /* Verify ranges of individual characters. This no longer includes the
2634 opening quote, but does include the closing quote.
2635 '01234'. */
2636 for (int i = 0; i <= 4; i++)
2637 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2638 /* U+2174. */
2639 for (int i = 5; i <= 7; i++)
2640 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2641 /* U+2175. */
2642 for (int i = 8; i <= 10; i++)
2643 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2644 /* '789' and nul terminator */
2645 for (int i = 11; i <= 14; i++)
2646 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2648 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2651 /* Lex a string literal containing UCN 8 characters.
2652 Verify the substring location data after running cpp_interpret_string
2653 on it. */
2655 static void
2656 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2658 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2659 ....................000000000.111111.1111222222.2222333333333.344444
2660 ....................123456789.012345.6789012345.6789012345678.901234 */
2661 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2662 lexer_test test (case_, content, NULL);
2664 /* Verify that we get the expected token back, with the correct
2665 location information. */
2666 const cpp_token *tok = test.get_token ();
2667 ASSERT_EQ (tok->type, CPP_STRING);
2668 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2669 "\"01234\\U00002174\\U00002175789\"");
2671 /* Verify that cpp_interpret_string works.
2672 The UTF-8 encoding of the string is identical to that from
2673 the ucn4 testcase above; the only difference is the column
2674 locations. */
2675 cpp_string dst_string;
2676 const enum cpp_ttype type = CPP_STRING;
2677 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2678 &dst_string, type);
2679 ASSERT_TRUE (result);
2680 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2681 (const char *)dst_string.text);
2682 free (const_cast <unsigned char *> (dst_string.text));
2684 /* Verify ranges of individual characters. This no longer includes the
2685 opening quote, but does include the closing quote.
2686 '01234'. */
2687 for (int i = 0; i <= 4; i++)
2688 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2689 /* U+2174. */
2690 for (int i = 5; i <= 7; i++)
2691 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2692 /* U+2175. */
2693 for (int i = 8; i <= 10; i++)
2694 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2695 /* '789' at columns 35-37 */
2696 for (int i = 11; i <= 13; i++)
2697 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2698 /* Closing quote/nul-terminator at column 38. */
2699 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2701 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2704 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2706 static uint32_t
2707 uint32_from_big_endian (const uint32_t *ptr_be_value)
2709 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2710 return (((uint32_t) buf[0] << 24)
2711 | ((uint32_t) buf[1] << 16)
2712 | ((uint32_t) buf[2] << 8)
2713 | (uint32_t) buf[3]);
2716 /* Lex a wide string literal and verify that attempts to read substring
2717 location data from it fail gracefully. */
2719 static void
2720 test_lexer_string_locations_wide_string (const line_table_case &case_)
2722 /* Digits 0-9.
2723 ....................000000000.11111111112.22222222233333
2724 ....................123456789.01234567890.12345678901234 */
2725 const char *content = " L\"0123456789\" /* non-str */\n";
2726 lexer_test test (case_, content, NULL);
2728 /* Verify that we get the expected token back, with the correct
2729 location information. */
2730 const cpp_token *tok = test.get_token ();
2731 ASSERT_EQ (tok->type, CPP_WSTRING);
2732 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2734 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2735 cpp_string dst_string;
2736 const enum cpp_ttype type = CPP_WSTRING;
2737 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2738 &dst_string, type);
2739 ASSERT_TRUE (result);
2740 /* The cpp_reader defaults to big-endian with
2741 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2742 now be encoded as UTF-32BE. */
2743 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2744 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2745 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2746 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2747 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2748 free (const_cast <unsigned char *> (dst_string.text));
2750 /* We don't yet support generating substring location information
2751 for L"" strings. */
2752 ASSERT_HAS_NO_SUBSTRING_RANGES
2753 (test, tok->src_loc, type,
2754 "execution character set != source character set");
2757 /* Fetch a big-endian 16-bit value and convert to host endianness. */
2759 static uint16_t
2760 uint16_from_big_endian (const uint16_t *ptr_be_value)
2762 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2763 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2766 /* Lex a u"" string literal and verify that attempts to read substring
2767 location data from it fail gracefully. */
2769 static void
2770 test_lexer_string_locations_string16 (const line_table_case &case_)
2772 /* Digits 0-9.
2773 ....................000000000.11111111112.22222222233333
2774 ....................123456789.01234567890.12345678901234 */
2775 const char *content = " u\"0123456789\" /* non-str */\n";
2776 lexer_test test (case_, content, NULL);
2778 /* Verify that we get the expected token back, with the correct
2779 location information. */
2780 const cpp_token *tok = test.get_token ();
2781 ASSERT_EQ (tok->type, CPP_STRING16);
2782 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2784 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2785 cpp_string dst_string;
2786 const enum cpp_ttype type = CPP_STRING16;
2787 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2788 &dst_string, type);
2789 ASSERT_TRUE (result);
2791 /* The cpp_reader defaults to big-endian, so dst_string should
2792 now be encoded as UTF-16BE. */
2793 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2794 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2795 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2796 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2797 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2798 free (const_cast <unsigned char *> (dst_string.text));
2800 /* We don't yet support generating substring location information
2801 for L"" strings. */
2802 ASSERT_HAS_NO_SUBSTRING_RANGES
2803 (test, tok->src_loc, type,
2804 "execution character set != source character set");
2807 /* Lex a U"" string literal and verify that attempts to read substring
2808 location data from it fail gracefully. */
2810 static void
2811 test_lexer_string_locations_string32 (const line_table_case &case_)
2813 /* Digits 0-9.
2814 ....................000000000.11111111112.22222222233333
2815 ....................123456789.01234567890.12345678901234 */
2816 const char *content = " U\"0123456789\" /* non-str */\n";
2817 lexer_test test (case_, content, NULL);
2819 /* Verify that we get the expected token back, with the correct
2820 location information. */
2821 const cpp_token *tok = test.get_token ();
2822 ASSERT_EQ (tok->type, CPP_STRING32);
2823 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2825 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2826 cpp_string dst_string;
2827 const enum cpp_ttype type = CPP_STRING32;
2828 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2829 &dst_string, type);
2830 ASSERT_TRUE (result);
2832 /* The cpp_reader defaults to big-endian, so dst_string should
2833 now be encoded as UTF-32BE. */
2834 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2835 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2836 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2837 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2838 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2839 free (const_cast <unsigned char *> (dst_string.text));
2841 /* We don't yet support generating substring location information
2842 for L"" strings. */
2843 ASSERT_HAS_NO_SUBSTRING_RANGES
2844 (test, tok->src_loc, type,
2845 "execution character set != source character set");
2848 /* Lex a u8-string literal.
2849 Verify the substring location data after running cpp_interpret_string
2850 on it. */
2852 static void
2853 test_lexer_string_locations_u8 (const line_table_case &case_)
2855 /* Digits 0-9.
2856 ....................000000000.11111111112.22222222233333
2857 ....................123456789.01234567890.12345678901234 */
2858 const char *content = " u8\"0123456789\" /* non-str */\n";
2859 lexer_test test (case_, content, NULL);
2861 /* Verify that we get the expected token back, with the correct
2862 location information. */
2863 const cpp_token *tok = test.get_token ();
2864 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2865 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2867 /* Verify that cpp_interpret_string works. */
2868 cpp_string dst_string;
2869 const enum cpp_ttype type = CPP_STRING;
2870 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2871 &dst_string, type);
2872 ASSERT_TRUE (result);
2873 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2874 free (const_cast <unsigned char *> (dst_string.text));
2876 /* Verify ranges of individual characters. This no longer includes the
2877 opening quote, but does include the closing quote. */
2878 for (int i = 0; i <= 10; i++)
2879 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2882 /* Lex a string literal containing UTF-8 source characters.
2883 Verify the substring location data after running cpp_interpret_string
2884 on it. */
2886 static void
2887 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2889 /* This string literal is written out to the source file as UTF-8,
2890 and is of the form "before mojibake after", where "mojibake"
2891 is written as the following four unicode code points:
2892 U+6587 CJK UNIFIED IDEOGRAPH-6587
2893 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2894 U+5316 CJK UNIFIED IDEOGRAPH-5316
2895 U+3051 HIRAGANA LETTER KE.
2896 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2897 "before" and "after" are 1 byte per unicode character.
2899 The numbering shown are "columns", which are *byte* numbers within
2900 the line, rather than unicode character numbers.
2902 .................... 000000000.1111111.
2903 .................... 123456789.0123456. */
2904 const char *content = (" \"before "
2905 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2906 UTF-8: 0xE6 0x96 0x87
2907 C octal escaped UTF-8: \346\226\207
2908 "column" numbers: 17-19. */
2909 "\346\226\207"
2911 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2912 UTF-8: 0xE5 0xAD 0x97
2913 C octal escaped UTF-8: \345\255\227
2914 "column" numbers: 20-22. */
2915 "\345\255\227"
2917 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2918 UTF-8: 0xE5 0x8C 0x96
2919 C octal escaped UTF-8: \345\214\226
2920 "column" numbers: 23-25. */
2921 "\345\214\226"
2923 /* U+3051 HIRAGANA LETTER KE
2924 UTF-8: 0xE3 0x81 0x91
2925 C octal escaped UTF-8: \343\201\221
2926 "column" numbers: 26-28. */
2927 "\343\201\221"
2929 /* column numbers 29 onwards
2930 2333333.33334444444444
2931 9012345.67890123456789. */
2932 " after\" /* non-str */\n");
2933 lexer_test test (case_, content, NULL);
2935 /* Verify that we get the expected token back, with the correct
2936 location information. */
2937 const cpp_token *tok = test.get_token ();
2938 ASSERT_EQ (tok->type, CPP_STRING);
2939 ASSERT_TOKEN_AS_TEXT_EQ
2940 (test.m_parser, tok,
2941 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2943 /* Verify that cpp_interpret_string works. */
2944 cpp_string dst_string;
2945 const enum cpp_ttype type = CPP_STRING;
2946 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2947 &dst_string, type);
2948 ASSERT_TRUE (result);
2949 ASSERT_STREQ
2950 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2951 (const char *)dst_string.text);
2952 free (const_cast <unsigned char *> (dst_string.text));
2954 /* Verify ranges of individual characters. This no longer includes the
2955 opening quote, but does include the closing quote.
2956 Assuming that both source and execution encodings are UTF-8, we have
2957 a run of 25 octets in each, plus the NUL terminator. */
2958 for (int i = 0; i < 25; i++)
2959 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2960 /* NUL-terminator should use the closing quote at column 35. */
2961 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
2963 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
2966 /* Test of string literal concatenation. */
2968 static void
2969 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
2971 /* Digits 0-9.
2972 .....................000000000.111111.11112222222222
2973 .....................123456789.012345.67890123456789. */
2974 const char *content = (" \"01234\" /* non-str */\n"
2975 " \"56789\" /* non-str */\n");
2976 lexer_test test (case_, content, NULL);
2978 location_t input_locs[2];
2980 /* Verify that we get the expected tokens back. */
2981 auto_vec <cpp_string> input_strings;
2982 const cpp_token *tok_a = test.get_token ();
2983 ASSERT_EQ (tok_a->type, CPP_STRING);
2984 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
2985 input_strings.safe_push (tok_a->val.str);
2986 input_locs[0] = tok_a->src_loc;
2988 const cpp_token *tok_b = test.get_token ();
2989 ASSERT_EQ (tok_b->type, CPP_STRING);
2990 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
2991 input_strings.safe_push (tok_b->val.str);
2992 input_locs[1] = tok_b->src_loc;
2994 /* Verify that cpp_interpret_string works. */
2995 cpp_string dst_string;
2996 const enum cpp_ttype type = CPP_STRING;
2997 bool result = cpp_interpret_string (test.m_parser,
2998 input_strings.address (), 2,
2999 &dst_string, type);
3000 ASSERT_TRUE (result);
3001 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3002 free (const_cast <unsigned char *> (dst_string.text));
3004 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3005 test.m_concats.record_string_concatenation (2, input_locs);
3007 location_t initial_loc = input_locs[0];
3009 /* "01234" on line 1. */
3010 for (int i = 0; i <= 4; i++)
3011 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3012 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3013 for (int i = 5; i <= 10; i++)
3014 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3016 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3019 /* Another test of string literal concatenation. */
3021 static void
3022 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3024 /* Digits 0-9.
3025 .....................000000000.111.11111112222222
3026 .....................123456789.012.34567890123456. */
3027 const char *content = (" \"01\" /* non-str */\n"
3028 " \"23\" /* non-str */\n"
3029 " \"45\" /* non-str */\n"
3030 " \"67\" /* non-str */\n"
3031 " \"89\" /* non-str */\n");
3032 lexer_test test (case_, content, NULL);
3034 auto_vec <cpp_string> input_strings;
3035 location_t input_locs[5];
3037 /* Verify that we get the expected tokens back. */
3038 for (int i = 0; i < 5; i++)
3040 const cpp_token *tok = test.get_token ();
3041 ASSERT_EQ (tok->type, CPP_STRING);
3042 input_strings.safe_push (tok->val.str);
3043 input_locs[i] = tok->src_loc;
3046 /* Verify that cpp_interpret_string works. */
3047 cpp_string dst_string;
3048 const enum cpp_ttype type = CPP_STRING;
3049 bool result = cpp_interpret_string (test.m_parser,
3050 input_strings.address (), 5,
3051 &dst_string, type);
3052 ASSERT_TRUE (result);
3053 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3054 free (const_cast <unsigned char *> (dst_string.text));
3056 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3057 test.m_concats.record_string_concatenation (5, input_locs);
3059 location_t initial_loc = input_locs[0];
3061 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3062 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3063 and expect get_source_range_for_substring to fail.
3064 However, for a string concatenation test, we can have a case
3065 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3066 but subsequent strings can be after it.
3067 Attempting to detect this within assert_char_at_range
3068 would overcomplicate the logic for the common test cases, so
3069 we detect it here. */
3070 if (should_have_column_data_p (input_locs[0])
3071 && !should_have_column_data_p (input_locs[4]))
3073 /* Verify that get_source_range_for_substring gracefully rejects
3074 this case. */
3075 source_range actual_range;
3076 const char *err
3077 = get_source_range_for_char (test.m_parser, &test.m_concats,
3078 initial_loc, type, 0, &actual_range);
3079 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3080 return;
3083 for (int i = 0; i < 5; i++)
3084 for (int j = 0; j < 2; j++)
3085 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3086 i + 1, 10 + j, 10 + j);
3088 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3089 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3091 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3094 /* Another test of string literal concatenation, this time combined with
3095 various kinds of escaped characters. */
3097 static void
3098 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3100 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3101 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3102 const char *content
3103 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3104 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3105 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3106 lexer_test test (case_, content, NULL);
3108 auto_vec <cpp_string> input_strings;
3109 location_t input_locs[4];
3111 /* Verify that we get the expected tokens back. */
3112 for (int i = 0; i < 4; i++)
3114 const cpp_token *tok = test.get_token ();
3115 ASSERT_EQ (tok->type, CPP_STRING);
3116 input_strings.safe_push (tok->val.str);
3117 input_locs[i] = tok->src_loc;
3120 /* Verify that cpp_interpret_string works. */
3121 cpp_string dst_string;
3122 const enum cpp_ttype type = CPP_STRING;
3123 bool result = cpp_interpret_string (test.m_parser,
3124 input_strings.address (), 4,
3125 &dst_string, type);
3126 ASSERT_TRUE (result);
3127 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3128 free (const_cast <unsigned char *> (dst_string.text));
3130 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3131 test.m_concats.record_string_concatenation (4, input_locs);
3133 location_t initial_loc = input_locs[0];
3135 for (int i = 0; i <= 4; i++)
3136 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3137 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3138 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3139 for (int i = 7; i <= 9; i++)
3140 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3142 /* NUL-terminator should use the location of the final closing quote. */
3143 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3145 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3148 /* Test of string literal in a macro. */
3150 static void
3151 test_lexer_string_locations_macro (const line_table_case &case_)
3153 /* Digits 0-9.
3154 .....................0000000001111111111.22222222223.
3155 .....................1234567890123456789.01234567890. */
3156 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3157 " MACRO");
3158 lexer_test test (case_, content, NULL);
3160 /* Verify that we get the expected tokens back. */
3161 const cpp_token *tok = test.get_token ();
3162 ASSERT_EQ (tok->type, CPP_PADDING);
3164 tok = test.get_token ();
3165 ASSERT_EQ (tok->type, CPP_STRING);
3166 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3168 /* Verify ranges of individual characters. We ought to
3169 see columns within the macro definition. */
3170 for (int i = 0; i <= 10; i++)
3171 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3172 i, 1, 20 + i, 20 + i);
3174 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3176 tok = test.get_token ();
3177 ASSERT_EQ (tok->type, CPP_PADDING);
3180 /* Test of stringification of a macro argument. */
3182 static void
3183 test_lexer_string_locations_stringified_macro_argument
3184 (const line_table_case &case_)
3186 /* .....................000000000111111111122222222223.
3187 .....................123456789012345678901234567890. */
3188 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3189 "MACRO(foo)\n");
3190 lexer_test test (case_, content, NULL);
3192 /* Verify that we get the expected token back. */
3193 const cpp_token *tok = test.get_token ();
3194 ASSERT_EQ (tok->type, CPP_PADDING);
3196 tok = test.get_token ();
3197 ASSERT_EQ (tok->type, CPP_STRING);
3198 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3200 /* We don't support getting the location of a stringified macro
3201 argument. Verify that it fails gracefully. */
3202 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3203 "cpp_interpret_string_1 failed");
3205 tok = test.get_token ();
3206 ASSERT_EQ (tok->type, CPP_PADDING);
3208 tok = test.get_token ();
3209 ASSERT_EQ (tok->type, CPP_PADDING);
3212 /* Ensure that we are fail gracefully if something attempts to pass
3213 in a location that isn't a string literal token. Seen on this code:
3215 const char a[] = " %d ";
3216 __builtin_printf (a, 0.5);
3219 when c-format.c erroneously used the indicated one-character
3220 location as the format string location, leading to a read past the
3221 end of a string buffer in cpp_interpret_string_1. */
3223 static void
3224 test_lexer_string_locations_non_string (const line_table_case &case_)
3226 /* .....................000000000111111111122222222223.
3227 .....................123456789012345678901234567890. */
3228 const char *content = (" a\n");
3229 lexer_test test (case_, content, NULL);
3231 /* Verify that we get the expected token back. */
3232 const cpp_token *tok = test.get_token ();
3233 ASSERT_EQ (tok->type, CPP_NAME);
3234 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3236 /* At this point, libcpp is attempting to interpret the name as a
3237 string literal, despite it not starting with a quote. We don't detect
3238 that, but we should at least fail gracefully. */
3239 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3240 "cpp_interpret_string_1 failed");
3243 /* Ensure that we can read substring information for a token which
3244 starts in one linemap and ends in another . Adapted from
3245 gcc.dg/cpp/pr69985.c. */
3247 static void
3248 test_lexer_string_locations_long_line (const line_table_case &case_)
3250 /* .....................000000.000111111111
3251 .....................123456.789012346789. */
3252 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3253 " \"0123456789012345678901234567890123456789"
3254 "0123456789012345678901234567890123456789"
3255 "0123456789012345678901234567890123456789"
3256 "0123456789\"\n");
3258 lexer_test test (case_, content, NULL);
3260 /* Verify that we get the expected token back. */
3261 const cpp_token *tok = test.get_token ();
3262 ASSERT_EQ (tok->type, CPP_STRING);
3264 if (!should_have_column_data_p (line_table->highest_location))
3265 return;
3267 /* Verify ranges of individual characters. */
3268 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3269 for (int i = 0; i < 131; i++)
3270 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3271 i, 2, 7 + i, 7 + i);
3274 /* Test of locations within a raw string that doesn't contain a newline. */
3276 static void
3277 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3279 /* .....................00.0000000111111111122.
3280 .....................12.3456789012345678901. */
3281 const char *content = ("R\"foo(0123456789)foo\"\n");
3282 lexer_test test (case_, content, NULL);
3284 /* Verify that we get the expected token back. */
3285 const cpp_token *tok = test.get_token ();
3286 ASSERT_EQ (tok->type, CPP_STRING);
3288 /* Verify that cpp_interpret_string works. */
3289 cpp_string dst_string;
3290 const enum cpp_ttype type = CPP_STRING;
3291 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3292 &dst_string, type);
3293 ASSERT_TRUE (result);
3294 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3295 free (const_cast <unsigned char *> (dst_string.text));
3297 if (!should_have_column_data_p (line_table->highest_location))
3298 return;
3300 /* 0-9, plus the nil terminator. */
3301 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3302 for (int i = 0; i < 11; i++)
3303 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3304 i, 1, 7 + i, 7 + i);
3307 /* Test of locations within a raw string that contains a newline. */
3309 static void
3310 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3312 /* .....................00.0000.
3313 .....................12.3456. */
3314 const char *content = ("R\"foo(\n"
3315 /* .....................00000.
3316 .....................12345. */
3317 "hello\n"
3318 "world\n"
3319 /* .....................00000.
3320 .....................12345. */
3321 ")foo\"\n");
3322 lexer_test test (case_, content, NULL);
3324 /* Verify that we get the expected token back. */
3325 const cpp_token *tok = test.get_token ();
3326 ASSERT_EQ (tok->type, CPP_STRING);
3328 /* Verify that cpp_interpret_string works. */
3329 cpp_string dst_string;
3330 const enum cpp_ttype type = CPP_STRING;
3331 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3332 &dst_string, type);
3333 ASSERT_TRUE (result);
3334 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3335 free (const_cast <unsigned char *> (dst_string.text));
3337 if (!should_have_column_data_p (line_table->highest_location))
3338 return;
3340 /* Currently we don't support locations within raw strings that
3341 contain newlines. */
3342 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3343 "range endpoints are on different lines");
3346 /* Test of parsing an unterminated raw string. */
3348 static void
3349 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3351 const char *content = "R\"ouch()ouCh\" /* etc */";
3353 lexer_error_sink errors;
3354 lexer_test test (case_, content, &errors);
3355 test.m_implicitly_expect_EOF = false;
3357 /* Attempt to parse the raw string. */
3358 const cpp_token *tok = test.get_token ();
3359 ASSERT_EQ (tok->type, CPP_EOF);
3361 ASSERT_EQ (1, errors.m_errors.length ());
3362 /* We expect the message "unterminated raw string"
3363 in the "cpplib" translation domain.
3364 It's not clear that dgettext is available on all supported hosts,
3365 so this assertion is commented-out for now.
3366 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3367 errors.m_errors[0]);
3371 /* Test of lexing char constants. */
3373 static void
3374 test_lexer_char_constants (const line_table_case &case_)
3376 /* Various char constants.
3377 .....................0000000001111111111.22222222223.
3378 .....................1234567890123456789.01234567890. */
3379 const char *content = (" 'a'\n"
3380 " u'a'\n"
3381 " U'a'\n"
3382 " L'a'\n"
3383 " 'abc'\n");
3384 lexer_test test (case_, content, NULL);
3386 /* Verify that we get the expected tokens back. */
3387 /* 'a'. */
3388 const cpp_token *tok = test.get_token ();
3389 ASSERT_EQ (tok->type, CPP_CHAR);
3390 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3392 unsigned int chars_seen;
3393 int unsignedp;
3394 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3395 &chars_seen, &unsignedp);
3396 ASSERT_EQ (cc, 'a');
3397 ASSERT_EQ (chars_seen, 1);
3399 /* u'a'. */
3400 tok = test.get_token ();
3401 ASSERT_EQ (tok->type, CPP_CHAR16);
3402 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3404 /* U'a'. */
3405 tok = test.get_token ();
3406 ASSERT_EQ (tok->type, CPP_CHAR32);
3407 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3409 /* L'a'. */
3410 tok = test.get_token ();
3411 ASSERT_EQ (tok->type, CPP_WCHAR);
3412 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3414 /* 'abc' (c-char-sequence). */
3415 tok = test.get_token ();
3416 ASSERT_EQ (tok->type, CPP_CHAR);
3417 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3419 /* A table of interesting location_t values, giving one axis of our test
3420 matrix. */
3422 static const location_t boundary_locations[] = {
3423 /* Zero means "don't override the default values for a new line_table". */
3426 /* An arbitrary non-zero value that isn't close to one of
3427 the boundary values below. */
3428 0x10000,
3430 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3431 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3432 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3433 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3434 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3435 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3437 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3438 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3439 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3440 LINE_MAP_MAX_LOCATION_WITH_COLS,
3441 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3442 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3445 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3447 void
3448 for_each_line_table_case (void (*testcase) (const line_table_case &))
3450 /* As noted above in the description of struct line_table_case,
3451 we want to explore a test matrix of interesting line_table
3452 situations, running various selftests for each case within the
3453 matrix. */
3455 /* Run all tests with:
3456 (a) line_table->default_range_bits == 0, and
3457 (b) line_table->default_range_bits == 5. */
3458 int num_cases_tested = 0;
3459 for (int default_range_bits = 0; default_range_bits <= 5;
3460 default_range_bits += 5)
3462 /* ...and use each of the "interesting" location values as
3463 the starting location within line_table. */
3464 const int num_boundary_locations
3465 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3466 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3468 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3470 testcase (c);
3472 num_cases_tested++;
3476 /* Verify that we fully covered the test matrix. */
3477 ASSERT_EQ (num_cases_tested, 2 * 12);
3480 /* Run all of the selftests within this file. */
3482 void
3483 input_c_tests ()
3485 test_should_have_column_data_p ();
3486 test_unknown_location ();
3487 test_builtins ();
3488 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3490 for_each_line_table_case (test_accessing_ordinary_linemaps);
3491 for_each_line_table_case (test_lexer);
3492 for_each_line_table_case (test_lexer_string_locations_simple);
3493 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3494 for_each_line_table_case (test_lexer_string_locations_hex);
3495 for_each_line_table_case (test_lexer_string_locations_oct);
3496 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3497 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3498 for_each_line_table_case (test_lexer_string_locations_ucn4);
3499 for_each_line_table_case (test_lexer_string_locations_ucn8);
3500 for_each_line_table_case (test_lexer_string_locations_wide_string);
3501 for_each_line_table_case (test_lexer_string_locations_string16);
3502 for_each_line_table_case (test_lexer_string_locations_string32);
3503 for_each_line_table_case (test_lexer_string_locations_u8);
3504 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3505 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3506 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3507 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3508 for_each_line_table_case (test_lexer_string_locations_macro);
3509 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3510 for_each_line_table_case (test_lexer_string_locations_non_string);
3511 for_each_line_table_case (test_lexer_string_locations_long_line);
3512 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3513 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3514 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3515 for_each_line_table_case (test_lexer_char_constants);
3517 test_reading_source_line ();
3520 } // namespace selftest
3522 #endif /* CHECKING_P */