compiler: record final type for numeric expressions
[official-gcc.git] / gcc / input.c
blob6ce9782d3a8f7b458c9b775084fb75f297ef6a6e
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2018 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic.h"
25 #include "diagnostic-core.h"
26 #include "selftest.h"
27 #include "cpplib.h"
29 #ifndef HAVE_ICONV
30 #define HAVE_ICONV 0
31 #endif
33 /* This is a cache used by get_next_line to store the content of a
34 file to be searched for file lines. */
35 struct fcache
37 /* These are information used to store a line boundary. */
38 struct line_info
40 /* The line number. It starts from 1. */
41 size_t line_num;
43 /* The position (byte count) of the beginning of the line,
44 relative to the file data pointer. This starts at zero. */
45 size_t start_pos;
47 /* The position (byte count) of the last byte of the line. This
48 normally points to the '\n' character, or to one byte after the
49 last byte of the file, if the file doesn't contain a '\n'
50 character. */
51 size_t end_pos;
53 line_info (size_t l, size_t s, size_t e)
54 : line_num (l), start_pos (s), end_pos (e)
57 line_info ()
58 :line_num (0), start_pos (0), end_pos (0)
62 /* The number of time this file has been accessed. This is used
63 to designate which file cache to evict from the cache
64 array. */
65 unsigned use_count;
67 /* The file_path is the key for identifying a particular file in
68 the cache.
69 For libcpp-using code, the underlying buffer for this field is
70 owned by the corresponding _cpp_file within the cpp_reader. */
71 const char *file_path;
73 FILE *fp;
75 /* This points to the content of the file that we've read so
76 far. */
77 char *data;
79 /* The size of the DATA array above.*/
80 size_t size;
82 /* The number of bytes read from the underlying file so far. This
83 must be less (or equal) than SIZE above. */
84 size_t nb_read;
86 /* The index of the beginning of the current line. */
87 size_t line_start_idx;
89 /* The number of the previous line read. This starts at 1. Zero
90 means we've read no line so far. */
91 size_t line_num;
93 /* This is the total number of lines of the current file. At the
94 moment, we try to get this information from the line map
95 subsystem. Note that this is just a hint. When using the C++
96 front-end, this hint is correct because the input file is then
97 completely tokenized before parsing starts; so the line map knows
98 the number of lines before compilation really starts. For e.g,
99 the C front-end, it can happen that we start emitting diagnostics
100 before the line map has seen the end of the file. */
101 size_t total_lines;
103 /* Could this file be missing a trailing newline on its final line?
104 Initially true (to cope with empty files), set to true/false
105 as each line is read. */
106 bool missing_trailing_newline;
108 /* This is a record of the beginning and end of the lines we've seen
109 while reading the file. This is useful to avoid walking the data
110 from the beginning when we are asked to read a line that is
111 before LINE_START_IDX above. Note that the maximum size of this
112 record is fcache_line_record_size, so that the memory consumption
113 doesn't explode. We thus scale total_lines down to
114 fcache_line_record_size. */
115 vec<line_info, va_heap> line_record;
117 fcache ();
118 ~fcache ();
121 /* Current position in real source file. */
123 location_t input_location = UNKNOWN_LOCATION;
125 struct line_maps *line_table;
127 /* A stashed copy of "line_table" for use by selftest::line_table_test.
128 This needs to be a global so that it can be a GC root, and thus
129 prevent the stashed copy from being garbage-collected if the GC runs
130 during a line_table_test. */
132 struct line_maps *saved_line_table;
134 static fcache *fcache_tab;
135 static const size_t fcache_tab_size = 16;
136 static const size_t fcache_buffer_size = 4 * 1024;
137 static const size_t fcache_line_record_size = 100;
139 /* Expand the source location LOC into a human readable location. If
140 LOC resolves to a builtin location, the file name of the readable
141 location is set to the string "<built-in>". If EXPANSION_POINT_P is
142 TRUE and LOC is virtual, then it is resolved to the expansion
143 point of the involved macro. Otherwise, it is resolved to the
144 spelling location of the token.
146 When resolving to the spelling location of the token, if the
147 resulting location is for a built-in location (that is, it has no
148 associated line/column) in the context of a macro expansion, the
149 returned location is the first one (while unwinding the macro
150 location towards its expansion point) that is in real source
151 code.
153 ASPECT controls which part of the location to use. */
155 static expanded_location
156 expand_location_1 (location_t loc,
157 bool expansion_point_p,
158 enum location_aspect aspect)
160 expanded_location xloc;
161 const line_map_ordinary *map;
162 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
163 tree block = NULL;
165 if (IS_ADHOC_LOC (loc))
167 block = LOCATION_BLOCK (loc);
168 loc = LOCATION_LOCUS (loc);
171 memset (&xloc, 0, sizeof (xloc));
173 if (loc >= RESERVED_LOCATION_COUNT)
175 if (!expansion_point_p)
177 /* We want to resolve LOC to its spelling location.
179 But if that spelling location is a reserved location that
180 appears in the context of a macro expansion (like for a
181 location for a built-in token), let's consider the first
182 location (toward the expansion point) that is not reserved;
183 that is, the first location that is in real source code. */
184 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
185 loc, NULL);
186 lrk = LRK_SPELLING_LOCATION;
188 loc = linemap_resolve_location (line_table, loc, lrk, &map);
190 /* loc is now either in an ordinary map, or is a reserved location.
191 If it is a compound location, the caret is in a spelling location,
192 but the start/finish might still be a virtual location.
193 Depending of what the caller asked for, we may need to recurse
194 one level in order to resolve any virtual locations in the
195 end-points. */
196 switch (aspect)
198 default:
199 gcc_unreachable ();
200 /* Fall through. */
201 case LOCATION_ASPECT_CARET:
202 break;
203 case LOCATION_ASPECT_START:
205 location_t start = get_start (loc);
206 if (start != loc)
207 return expand_location_1 (start, expansion_point_p, aspect);
209 break;
210 case LOCATION_ASPECT_FINISH:
212 location_t finish = get_finish (loc);
213 if (finish != loc)
214 return expand_location_1 (finish, expansion_point_p, aspect);
216 break;
218 xloc = linemap_expand_location (line_table, map, loc);
221 xloc.data = block;
222 if (loc <= BUILTINS_LOCATION)
223 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
225 return xloc;
228 /* Initialize the set of cache used for files accessed by caret
229 diagnostic. */
231 static void
232 diagnostic_file_cache_init (void)
234 if (fcache_tab == NULL)
235 fcache_tab = new fcache[fcache_tab_size];
238 /* Free the resources used by the set of cache used for files accessed
239 by caret diagnostic. */
241 void
242 diagnostic_file_cache_fini (void)
244 if (fcache_tab)
246 delete [] (fcache_tab);
247 fcache_tab = NULL;
251 /* Return the total lines number that have been read so far by the
252 line map (in the preprocessor) so far. For languages like C++ that
253 entirely preprocess the input file before starting to parse, this
254 equals the actual number of lines of the file. */
256 static size_t
257 total_lines_num (const char *file_path)
259 size_t r = 0;
260 location_t l = 0;
261 if (linemap_get_file_highest_location (line_table, file_path, &l))
263 gcc_assert (l >= RESERVED_LOCATION_COUNT);
264 expanded_location xloc = expand_location (l);
265 r = xloc.line;
267 return r;
270 /* Lookup the cache used for the content of a given file accessed by
271 caret diagnostic. Return the found cached file, or NULL if no
272 cached file was found. */
274 static fcache*
275 lookup_file_in_cache_tab (const char *file_path)
277 if (file_path == NULL)
278 return NULL;
280 diagnostic_file_cache_init ();
282 /* This will contain the found cached file. */
283 fcache *r = NULL;
284 for (unsigned i = 0; i < fcache_tab_size; ++i)
286 fcache *c = &fcache_tab[i];
287 if (c->file_path && !strcmp (c->file_path, file_path))
289 ++c->use_count;
290 r = c;
294 if (r)
295 ++r->use_count;
297 return r;
300 /* Purge any mention of FILENAME from the cache of files used for
301 printing source code. For use in selftests when working
302 with tempfiles. */
304 void
305 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
307 gcc_assert (file_path);
309 fcache *r = lookup_file_in_cache_tab (file_path);
310 if (!r)
311 /* Not found. */
312 return;
314 r->file_path = NULL;
315 if (r->fp)
316 fclose (r->fp);
317 r->fp = NULL;
318 r->nb_read = 0;
319 r->line_start_idx = 0;
320 r->line_num = 0;
321 r->line_record.truncate (0);
322 r->use_count = 0;
323 r->total_lines = 0;
324 r->missing_trailing_newline = true;
327 /* Return the file cache that has been less used, recently, or the
328 first empty one. If HIGHEST_USE_COUNT is non-null,
329 *HIGHEST_USE_COUNT is set to the highest use count of the entries
330 in the cache table. */
332 static fcache*
333 evicted_cache_tab_entry (unsigned *highest_use_count)
335 diagnostic_file_cache_init ();
337 fcache *to_evict = &fcache_tab[0];
338 unsigned huc = to_evict->use_count;
339 for (unsigned i = 1; i < fcache_tab_size; ++i)
341 fcache *c = &fcache_tab[i];
342 bool c_is_empty = (c->file_path == NULL);
344 if (c->use_count < to_evict->use_count
345 || (to_evict->file_path && c_is_empty))
346 /* We evict C because it's either an entry with a lower use
347 count or one that is empty. */
348 to_evict = c;
350 if (huc < c->use_count)
351 huc = c->use_count;
353 if (c_is_empty)
354 /* We've reached the end of the cache; subsequent elements are
355 all empty. */
356 break;
359 if (highest_use_count)
360 *highest_use_count = huc;
362 return to_evict;
365 /* Create the cache used for the content of a given file to be
366 accessed by caret diagnostic. This cache is added to an array of
367 cache and can be retrieved by lookup_file_in_cache_tab. This
368 function returns the created cache. Note that only the last
369 fcache_tab_size files are cached. */
371 static fcache*
372 add_file_to_cache_tab (const char *file_path)
375 FILE *fp = fopen (file_path, "r");
376 if (fp == NULL)
377 return NULL;
379 unsigned highest_use_count = 0;
380 fcache *r = evicted_cache_tab_entry (&highest_use_count);
381 r->file_path = file_path;
382 if (r->fp)
383 fclose (r->fp);
384 r->fp = fp;
385 r->nb_read = 0;
386 r->line_start_idx = 0;
387 r->line_num = 0;
388 r->line_record.truncate (0);
389 /* Ensure that this cache entry doesn't get evicted next time
390 add_file_to_cache_tab is called. */
391 r->use_count = ++highest_use_count;
392 r->total_lines = total_lines_num (file_path);
393 r->missing_trailing_newline = true;
395 return r;
398 /* Lookup the cache used for the content of a given file accessed by
399 caret diagnostic. If no cached file was found, create a new cache
400 for this file, add it to the array of cached file and return
401 it. */
403 static fcache*
404 lookup_or_add_file_to_cache_tab (const char *file_path)
406 fcache *r = lookup_file_in_cache_tab (file_path);
407 if (r == NULL)
408 r = add_file_to_cache_tab (file_path);
409 return r;
412 /* Default constructor for a cache of file used by caret
413 diagnostic. */
415 fcache::fcache ()
416 : use_count (0), file_path (NULL), fp (NULL), data (0),
417 size (0), nb_read (0), line_start_idx (0), line_num (0),
418 total_lines (0), missing_trailing_newline (true)
420 line_record.create (0);
423 /* Destructor for a cache of file used by caret diagnostic. */
425 fcache::~fcache ()
427 if (fp)
429 fclose (fp);
430 fp = NULL;
432 if (data)
434 XDELETEVEC (data);
435 data = 0;
437 line_record.release ();
440 /* Returns TRUE iff the cache would need to be filled with data coming
441 from the file. That is, either the cache is empty or full or the
442 current line is empty. Note that if the cache is full, it would
443 need to be extended and filled again. */
445 static bool
446 needs_read (fcache *c)
448 return (c->nb_read == 0
449 || c->nb_read == c->size
450 || (c->line_start_idx >= c->nb_read - 1));
453 /* Return TRUE iff the cache is full and thus needs to be
454 extended. */
456 static bool
457 needs_grow (fcache *c)
459 return c->nb_read == c->size;
462 /* Grow the cache if it needs to be extended. */
464 static void
465 maybe_grow (fcache *c)
467 if (!needs_grow (c))
468 return;
470 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
471 c->data = XRESIZEVEC (char, c->data, size);
472 c->size = size;
475 /* Read more data into the cache. Extends the cache if need be.
476 Returns TRUE iff new data could be read. */
478 static bool
479 read_data (fcache *c)
481 if (feof (c->fp) || ferror (c->fp))
482 return false;
484 maybe_grow (c);
486 char * from = c->data + c->nb_read;
487 size_t to_read = c->size - c->nb_read;
488 size_t nb_read = fread (from, 1, to_read, c->fp);
490 if (ferror (c->fp))
491 return false;
493 c->nb_read += nb_read;
494 return !!nb_read;
497 /* Read new data iff the cache needs to be filled with more data
498 coming from the file FP. Return TRUE iff the cache was filled with
499 mode data. */
501 static bool
502 maybe_read_data (fcache *c)
504 if (!needs_read (c))
505 return false;
506 return read_data (c);
509 /* Read a new line from file FP, using C as a cache for the data
510 coming from the file. Upon successful completion, *LINE is set to
511 the beginning of the line found. *LINE points directly in the
512 line cache and is only valid until the next call of get_next_line.
513 *LINE_LEN is set to the length of the line. Note that the line
514 does not contain any terminal delimiter. This function returns
515 true if some data was read or process from the cache, false
516 otherwise. Note that subsequent calls to get_next_line might
517 make the content of *LINE invalid. */
519 static bool
520 get_next_line (fcache *c, char **line, ssize_t *line_len)
522 /* Fill the cache with data to process. */
523 maybe_read_data (c);
525 size_t remaining_size = c->nb_read - c->line_start_idx;
526 if (remaining_size == 0)
527 /* There is no more data to process. */
528 return false;
530 char *line_start = c->data + c->line_start_idx;
532 char *next_line_start = NULL;
533 size_t len = 0;
534 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
535 if (line_end == NULL)
537 /* We haven't found the end-of-line delimiter in the cache.
538 Fill the cache with more data from the file and look for the
539 '\n'. */
540 while (maybe_read_data (c))
542 line_start = c->data + c->line_start_idx;
543 remaining_size = c->nb_read - c->line_start_idx;
544 line_end = (char *) memchr (line_start, '\n', remaining_size);
545 if (line_end != NULL)
547 next_line_start = line_end + 1;
548 break;
551 if (line_end == NULL)
553 /* We've loadded all the file into the cache and still no
554 '\n'. Let's say the line ends up at one byte passed the
555 end of the file. This is to stay consistent with the case
556 of when the line ends up with a '\n' and line_end points to
557 that terminal '\n'. That consistency is useful below in
558 the len calculation. */
559 line_end = c->data + c->nb_read ;
560 c->missing_trailing_newline = true;
562 else
563 c->missing_trailing_newline = false;
565 else
567 next_line_start = line_end + 1;
568 c->missing_trailing_newline = false;
571 if (ferror (c->fp))
572 return false;
574 /* At this point, we've found the end of the of line. It either
575 points to the '\n' or to one byte after the last byte of the
576 file. */
577 gcc_assert (line_end != NULL);
579 len = line_end - line_start;
581 if (c->line_start_idx < c->nb_read)
582 *line = line_start;
584 ++c->line_num;
586 /* Before we update our line record, make sure the hint about the
587 total number of lines of the file is correct. If it's not, then
588 we give up recording line boundaries from now on. */
589 bool update_line_record = true;
590 if (c->line_num > c->total_lines)
591 update_line_record = false;
593 /* Now update our line record so that re-reading lines from the
594 before c->line_start_idx is faster. */
595 if (update_line_record
596 && c->line_record.length () < fcache_line_record_size)
598 /* If the file lines fits in the line record, we just record all
599 its lines ...*/
600 if (c->total_lines <= fcache_line_record_size
601 && c->line_num > c->line_record.length ())
602 c->line_record.safe_push (fcache::line_info (c->line_num,
603 c->line_start_idx,
604 line_end - c->data));
605 else if (c->total_lines > fcache_line_record_size)
607 /* ... otherwise, we just scale total_lines down to
608 (fcache_line_record_size lines. */
609 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
610 if (c->line_record.length () == 0
611 || n >= c->line_record.length ())
612 c->line_record.safe_push (fcache::line_info (c->line_num,
613 c->line_start_idx,
614 line_end - c->data));
618 /* Update c->line_start_idx so that it points to the next line to be
619 read. */
620 if (next_line_start)
621 c->line_start_idx = next_line_start - c->data;
622 else
623 /* We didn't find any terminal '\n'. Let's consider that the end
624 of line is the end of the data in the cache. The next
625 invocation of get_next_line will either read more data from the
626 underlying file or return false early because we've reached the
627 end of the file. */
628 c->line_start_idx = c->nb_read;
630 *line_len = len;
632 return true;
635 /* Consume the next bytes coming from the cache (or from its
636 underlying file if there are remaining unread bytes in the file)
637 until we reach the next end-of-line (or end-of-file). There is no
638 copying from the cache involved. Return TRUE upon successful
639 completion. */
641 static bool
642 goto_next_line (fcache *cache)
644 char *l;
645 ssize_t len;
647 return get_next_line (cache, &l, &len);
650 /* Read an arbitrary line number LINE_NUM from the file cached in C.
651 If the line was read successfully, *LINE points to the beginning
652 of the line in the file cache and *LINE_LEN is the length of the
653 line. *LINE is not nul-terminated, but may contain zero bytes.
654 *LINE is only valid until the next call of read_line_num.
655 This function returns bool if a line was read. */
657 static bool
658 read_line_num (fcache *c, size_t line_num,
659 char **line, ssize_t *line_len)
661 gcc_assert (line_num > 0);
663 if (line_num <= c->line_num)
665 /* We've been asked to read lines that are before c->line_num.
666 So lets use our line record (if it's not empty) to try to
667 avoid re-reading the file from the beginning again. */
669 if (c->line_record.is_empty ())
671 c->line_start_idx = 0;
672 c->line_num = 0;
674 else
676 fcache::line_info *i = NULL;
677 if (c->total_lines <= fcache_line_record_size)
679 /* In languages where the input file is not totally
680 preprocessed up front, the c->total_lines hint
681 can be smaller than the number of lines of the
682 file. In that case, only the first
683 c->total_lines have been recorded.
685 Otherwise, the first c->total_lines we've read have
686 their start/end recorded here. */
687 i = (line_num <= c->total_lines)
688 ? &c->line_record[line_num - 1]
689 : &c->line_record[c->total_lines - 1];
690 gcc_assert (i->line_num <= line_num);
692 else
694 /* So the file had more lines than our line record
695 size. Thus the number of lines we've recorded has
696 been scaled down to fcache_line_reacord_size. Let's
697 pick the start/end of the recorded line that is
698 closest to line_num. */
699 size_t n = (line_num <= c->total_lines)
700 ? line_num * fcache_line_record_size / c->total_lines
701 : c ->line_record.length () - 1;
702 if (n < c->line_record.length ())
704 i = &c->line_record[n];
705 gcc_assert (i->line_num <= line_num);
709 if (i && i->line_num == line_num)
711 /* We have the start/end of the line. */
712 *line = c->data + i->start_pos;
713 *line_len = i->end_pos - i->start_pos;
714 return true;
717 if (i)
719 c->line_start_idx = i->start_pos;
720 c->line_num = i->line_num - 1;
722 else
724 c->line_start_idx = 0;
725 c->line_num = 0;
730 /* Let's walk from line c->line_num up to line_num - 1, without
731 copying any line. */
732 while (c->line_num < line_num - 1)
733 if (!goto_next_line (c))
734 return false;
736 /* The line we want is the next one. Let's read and copy it back to
737 the caller. */
738 return get_next_line (c, line, line_len);
741 /* Return the physical source line that corresponds to FILE_PATH/LINE.
742 The line is not nul-terminated. The returned pointer is only
743 valid until the next call of location_get_source_line.
744 Note that the line can contain several null characters,
745 so the returned value's length has the actual length of the line.
746 If the function fails, a NULL char_span is returned. */
748 char_span
749 location_get_source_line (const char *file_path, int line)
751 char *buffer = NULL;
752 ssize_t len;
754 if (line == 0)
755 return char_span (NULL, 0);
757 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
758 if (c == NULL)
759 return char_span (NULL, 0);
761 bool read = read_line_num (c, line, &buffer, &len);
762 if (!read)
763 return char_span (NULL, 0);
765 return char_span (buffer, len);
768 /* Determine if FILE_PATH missing a trailing newline on its final line.
769 Only valid to call once all of the file has been loaded, by
770 requesting a line number beyond the end of the file. */
772 bool
773 location_missing_trailing_newline (const char *file_path)
775 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
776 if (c == NULL)
777 return false;
779 return c->missing_trailing_newline;
782 /* Test if the location originates from the spelling location of a
783 builtin-tokens. That is, return TRUE if LOC is a (possibly
784 virtual) location of a built-in token that appears in the expansion
785 list of a macro. Please note that this function also works on
786 tokens that result from built-in tokens. For instance, the
787 function would return true if passed a token "4" that is the result
788 of the expansion of the built-in __LINE__ macro. */
789 bool
790 is_location_from_builtin_token (location_t loc)
792 const line_map_ordinary *map = NULL;
793 loc = linemap_resolve_location (line_table, loc,
794 LRK_SPELLING_LOCATION, &map);
795 return loc == BUILTINS_LOCATION;
798 /* Expand the source location LOC into a human readable location. If
799 LOC is virtual, it resolves to the expansion point of the involved
800 macro. If LOC resolves to a builtin location, the file name of the
801 readable location is set to the string "<built-in>". */
803 expanded_location
804 expand_location (location_t loc)
806 return expand_location_1 (loc, /*expansion_point_p=*/true,
807 LOCATION_ASPECT_CARET);
810 /* Expand the source location LOC into a human readable location. If
811 LOC is virtual, it resolves to the expansion location of the
812 relevant macro. If LOC resolves to a builtin location, the file
813 name of the readable location is set to the string
814 "<built-in>". */
816 expanded_location
817 expand_location_to_spelling_point (location_t loc,
818 enum location_aspect aspect)
820 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
823 /* The rich_location class within libcpp requires a way to expand
824 location_t instances, and relies on the client code
825 providing a symbol named
826 linemap_client_expand_location_to_spelling_point
827 to do this.
829 This is the implementation for libcommon.a (all host binaries),
830 which simply calls into expand_location_1. */
832 expanded_location
833 linemap_client_expand_location_to_spelling_point (location_t loc,
834 enum location_aspect aspect)
836 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
840 /* If LOCATION is in a system header and if it is a virtual location for
841 a token coming from the expansion of a macro, unwind it to the
842 location of the expansion point of the macro. Otherwise, just return
843 LOCATION.
845 This is used for instance when we want to emit diagnostics about a
846 token that may be located in a macro that is itself defined in a
847 system header, for example, for the NULL macro. In such a case, if
848 LOCATION were passed directly to diagnostic functions such as
849 warning_at, the diagnostic would be suppressed (unless
850 -Wsystem-headers). */
852 location_t
853 expansion_point_location_if_in_system_header (location_t location)
855 if (in_system_header_at (location))
856 location = linemap_resolve_location (line_table, location,
857 LRK_MACRO_EXPANSION_POINT,
858 NULL);
859 return location;
862 /* If LOCATION is a virtual location for a token coming from the expansion
863 of a macro, unwind to the location of the expansion point of the macro. */
865 location_t
866 expansion_point_location (location_t location)
868 return linemap_resolve_location (line_table, location,
869 LRK_MACRO_EXPANSION_POINT, NULL);
872 /* Construct a location with caret at CARET, ranging from START to
873 finish e.g.
875 11111111112
876 12345678901234567890
878 523 return foo + bar;
879 ~~~~^~~~~
882 The location's caret is at the "+", line 523 column 15, but starts
883 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
884 of "bar" at column 19. */
886 location_t
887 make_location (location_t caret, location_t start, location_t finish)
889 location_t pure_loc = get_pure_location (caret);
890 source_range src_range;
891 src_range.m_start = get_start (start);
892 src_range.m_finish = get_finish (finish);
893 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
894 pure_loc,
895 src_range,
896 NULL);
897 return combined_loc;
900 /* Same as above, but taking a source range rather than two locations. */
902 location_t
903 make_location (location_t caret, source_range src_range)
905 location_t pure_loc = get_pure_location (caret);
906 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
909 /* Dump statistics to stderr about the memory usage of the line_table
910 set of line maps. This also displays some statistics about macro
911 expansion. */
913 void
914 dump_line_table_statistics (void)
916 struct linemap_stats s;
917 long total_used_map_size,
918 macro_maps_size,
919 total_allocated_map_size;
921 memset (&s, 0, sizeof (s));
923 linemap_get_statistics (line_table, &s);
925 macro_maps_size = s.macro_maps_used_size
926 + s.macro_maps_locations_size;
928 total_allocated_map_size = s.ordinary_maps_allocated_size
929 + s.macro_maps_allocated_size
930 + s.macro_maps_locations_size;
932 total_used_map_size = s.ordinary_maps_used_size
933 + s.macro_maps_used_size
934 + s.macro_maps_locations_size;
936 fprintf (stderr, "Number of expanded macros: %5ld\n",
937 s.num_expanded_macros);
938 if (s.num_expanded_macros != 0)
939 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
940 s.num_macro_tokens / s.num_expanded_macros);
941 fprintf (stderr,
942 "\nLine Table allocations during the "
943 "compilation process\n");
944 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
945 SIZE_AMOUNT (s.num_ordinary_maps_used));
946 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
947 SIZE_AMOUNT (s.ordinary_maps_used_size));
948 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
949 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
950 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
951 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
952 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
953 SIZE_AMOUNT (s.num_macro_maps_used));
954 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
955 SIZE_AMOUNT (s.macro_maps_used_size));
956 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
957 SIZE_AMOUNT (s.macro_maps_locations_size));
958 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
959 SIZE_AMOUNT (macro_maps_size));
960 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
961 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
962 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
963 SIZE_AMOUNT (total_allocated_map_size));
964 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
965 SIZE_AMOUNT (total_used_map_size));
966 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
967 SIZE_AMOUNT (s.adhoc_table_size));
968 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
969 SIZE_AMOUNT (s.adhoc_table_entries_used));
970 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
971 SIZE_AMOUNT (line_table->num_optimized_ranges));
972 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
973 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
975 fprintf (stderr, "\n");
978 /* Get location one beyond the final location in ordinary map IDX. */
980 static location_t
981 get_end_location (struct line_maps *set, unsigned int idx)
983 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
984 return set->highest_location;
986 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
987 return MAP_START_LOCATION (next_map);
990 /* Helper function for write_digit_row. */
992 static void
993 write_digit (FILE *stream, int digit)
995 fputc ('0' + (digit % 10), stream);
998 /* Helper function for dump_location_info.
999 Write a row of numbers to STREAM, numbering a source line,
1000 giving the units, tens, hundreds etc of the column number. */
1002 static void
1003 write_digit_row (FILE *stream, int indent,
1004 const line_map_ordinary *map,
1005 location_t loc, int max_col, int divisor)
1007 fprintf (stream, "%*c", indent, ' ');
1008 fprintf (stream, "|");
1009 for (int column = 1; column < max_col; column++)
1011 location_t column_loc = loc + (column << map->m_range_bits);
1012 write_digit (stream, column_loc / divisor);
1014 fprintf (stream, "\n");
1017 /* Write a half-closed (START) / half-open (END) interval of
1018 location_t to STREAM. */
1020 static void
1021 dump_location_range (FILE *stream,
1022 location_t start, location_t end)
1024 fprintf (stream,
1025 " location_t interval: %u <= loc < %u\n",
1026 start, end);
1029 /* Write a labelled description of a half-closed (START) / half-open (END)
1030 interval of location_t to STREAM. */
1032 static void
1033 dump_labelled_location_range (FILE *stream,
1034 const char *name,
1035 location_t start, location_t end)
1037 fprintf (stream, "%s\n", name);
1038 dump_location_range (stream, start, end);
1039 fprintf (stream, "\n");
1042 /* Write a visualization of the locations in the line_table to STREAM. */
1044 void
1045 dump_location_info (FILE *stream)
1047 /* Visualize the reserved locations. */
1048 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1049 0, RESERVED_LOCATION_COUNT);
1051 /* Visualize the ordinary line_map instances, rendering the sources. */
1052 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1054 location_t end_location = get_end_location (line_table, idx);
1055 /* half-closed: doesn't include this one. */
1057 const line_map_ordinary *map
1058 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1059 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1060 dump_location_range (stream,
1061 MAP_START_LOCATION (map), end_location);
1062 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1063 fprintf (stream, " starting at line: %i\n",
1064 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1065 fprintf (stream, " column and range bits: %i\n",
1066 map->m_column_and_range_bits);
1067 fprintf (stream, " column bits: %i\n",
1068 map->m_column_and_range_bits - map->m_range_bits);
1069 fprintf (stream, " range bits: %i\n",
1070 map->m_range_bits);
1071 const char * reason;
1072 switch (map->reason) {
1073 case LC_ENTER:
1074 reason = "LC_ENTER";
1075 break;
1076 case LC_LEAVE:
1077 reason = "LC_LEAVE";
1078 break;
1079 case LC_RENAME:
1080 reason = "LC_RENAME";
1081 break;
1082 case LC_RENAME_VERBATIM:
1083 reason = "LC_RENAME_VERBATIM";
1084 break;
1085 case LC_ENTER_MACRO:
1086 reason = "LC_RENAME_MACRO";
1087 break;
1088 default:
1089 reason = "Unknown";
1091 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1093 const line_map_ordinary *includer_map
1094 = linemap_included_from_linemap (line_table, map);
1095 fprintf (stream, " included from location: %d",
1096 linemap_included_from (map));
1097 if (includer_map) {
1098 fprintf (stream, " (in ordinary map %d)",
1099 int (includer_map - line_table->info_ordinary.maps));
1101 fprintf (stream, "\n");
1103 /* Render the span of source lines that this "map" covers. */
1104 for (location_t loc = MAP_START_LOCATION (map);
1105 loc < end_location;
1106 loc += (1 << map->m_range_bits) )
1108 gcc_assert (pure_location_p (line_table, loc) );
1110 expanded_location exploc
1111 = linemap_expand_location (line_table, map, loc);
1113 if (exploc.column == 0)
1115 /* Beginning of a new source line: draw the line. */
1117 char_span line_text = location_get_source_line (exploc.file,
1118 exploc.line);
1119 if (!line_text)
1120 break;
1121 fprintf (stream,
1122 "%s:%3i|loc:%5i|%.*s\n",
1123 exploc.file, exploc.line,
1124 loc,
1125 (int)line_text.length (), line_text.get_buffer ());
1127 /* "loc" is at column 0, which means "the whole line".
1128 Render the locations *within* the line, by underlining
1129 it, showing the location_t numeric values
1130 at each column. */
1131 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1132 if (max_col > line_text.length ())
1133 max_col = line_text.length () + 1;
1135 int len_lnum = num_digits (exploc.line);
1136 if (len_lnum < 3)
1137 len_lnum = 3;
1138 int len_loc = num_digits (loc);
1139 if (len_loc < 5)
1140 len_loc = 5;
1142 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1144 /* Thousands. */
1145 if (end_location > 999)
1146 write_digit_row (stream, indent, map, loc, max_col, 1000);
1148 /* Hundreds. */
1149 if (end_location > 99)
1150 write_digit_row (stream, indent, map, loc, max_col, 100);
1152 /* Tens. */
1153 write_digit_row (stream, indent, map, loc, max_col, 10);
1155 /* Units. */
1156 write_digit_row (stream, indent, map, loc, max_col, 1);
1159 fprintf (stream, "\n");
1162 /* Visualize unallocated values. */
1163 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1164 line_table->highest_location,
1165 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1167 /* Visualize the macro line_map instances, rendering the sources. */
1168 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1170 /* Each macro map that is allocated owns location_t values
1171 that are *lower* that the one before them.
1172 Hence it's meaningful to view them either in order of ascending
1173 source locations, or in order of ascending macro map index. */
1174 const bool ascending_location_ts = true;
1175 unsigned int idx = (ascending_location_ts
1176 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1177 : i);
1178 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1179 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1180 idx,
1181 linemap_map_get_macro_name (map),
1182 MACRO_MAP_NUM_MACRO_TOKENS (map));
1183 dump_location_range (stream,
1184 map->start_location,
1185 (map->start_location
1186 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1187 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1188 "expansion point is location %i",
1189 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1190 fprintf (stream, " map->start_location: %u\n",
1191 map->start_location);
1193 fprintf (stream, " macro_locations:\n");
1194 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1196 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1197 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1199 /* linemap_add_macro_token encodes token numbers in an expansion
1200 by putting them after MAP_START_LOCATION. */
1202 /* I'm typically seeing 4 uninitialized entries at the end of
1203 0xafafafaf.
1204 This appears to be due to macro.c:replace_args
1205 adding 2 extra args for padding tokens; presumably there may
1206 be a leading and/or trailing padding token injected,
1207 each for 2 more location slots.
1208 This would explain there being up to 4 location_ts slots
1209 that may be uninitialized. */
1211 fprintf (stream, " %u: %u, %u\n",
1215 if (x == y)
1217 if (x < MAP_START_LOCATION (map))
1218 inform (x, "token %u has x-location == y-location == %u", i, x);
1219 else
1220 fprintf (stream,
1221 "x-location == y-location == %u encodes token # %u\n",
1222 x, x - MAP_START_LOCATION (map));
1224 else
1226 inform (x, "token %u has x-location == %u", i, x);
1227 inform (x, "token %u has y-location == %u", i, y);
1230 fprintf (stream, "\n");
1233 /* It appears that MAX_LOCATION_T itself is never assigned to a
1234 macro map, presumably due to an off-by-one error somewhere
1235 between the logic in linemap_enter_macro and
1236 LINEMAPS_MACRO_LOWEST_LOCATION. */
1237 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1238 MAX_LOCATION_T,
1239 MAX_LOCATION_T + 1);
1241 /* Visualize ad-hoc values. */
1242 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1243 MAX_LOCATION_T + 1, UINT_MAX);
1246 /* string_concat's constructor. */
1248 string_concat::string_concat (int num, location_t *locs)
1249 : m_num (num)
1251 m_locs = ggc_vec_alloc <location_t> (num);
1252 for (int i = 0; i < num; i++)
1253 m_locs[i] = locs[i];
1256 /* string_concat_db's constructor. */
1258 string_concat_db::string_concat_db ()
1260 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1263 /* Record that a string concatenation occurred, covering NUM
1264 string literal tokens. LOCS is an array of size NUM, containing the
1265 locations of the tokens. A copy of LOCS is taken. */
1267 void
1268 string_concat_db::record_string_concatenation (int num, location_t *locs)
1270 gcc_assert (num > 1);
1271 gcc_assert (locs);
1273 location_t key_loc = get_key_loc (locs[0]);
1275 string_concat *concat
1276 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1277 m_table->put (key_loc, concat);
1280 /* Determine if LOC was the location of the the initial token of a
1281 concatenation of string literal tokens.
1282 If so, *OUT_NUM is written to with the number of tokens, and
1283 *OUT_LOCS with the location of an array of locations of the
1284 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1285 storage owned by the string_concat_db.
1286 Otherwise, return false. */
1288 bool
1289 string_concat_db::get_string_concatenation (location_t loc,
1290 int *out_num,
1291 location_t **out_locs)
1293 gcc_assert (out_num);
1294 gcc_assert (out_locs);
1296 location_t key_loc = get_key_loc (loc);
1298 string_concat **concat = m_table->get (key_loc);
1299 if (!concat)
1300 return false;
1302 *out_num = (*concat)->m_num;
1303 *out_locs =(*concat)->m_locs;
1304 return true;
1307 /* Internal function. Canonicalize LOC into a form suitable for
1308 use as a key within the database, stripping away macro expansion,
1309 ad-hoc information, and range information, using the location of
1310 the start of LOC within an ordinary linemap. */
1312 location_t
1313 string_concat_db::get_key_loc (location_t loc)
1315 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1316 NULL);
1318 loc = get_range_from_loc (line_table, loc).m_start;
1320 return loc;
1323 /* Helper class for use within get_substring_ranges_for_loc.
1324 An vec of cpp_string with responsibility for releasing all of the
1325 str->text for each str in the vector. */
1327 class auto_cpp_string_vec : public auto_vec <cpp_string>
1329 public:
1330 auto_cpp_string_vec (int alloc)
1331 : auto_vec <cpp_string> (alloc) {}
1333 ~auto_cpp_string_vec ()
1335 /* Clean up the copies within this vec. */
1336 int i;
1337 cpp_string *str;
1338 FOR_EACH_VEC_ELT (*this, i, str)
1339 free (const_cast <unsigned char *> (str->text));
1343 /* Attempt to populate RANGES with source location information on the
1344 individual characters within the string literal found at STRLOC.
1345 If CONCATS is non-NULL, then any string literals that the token at
1346 STRLOC was concatenated with are also added to RANGES.
1348 Return NULL if successful, or an error message if any errors occurred (in
1349 which case RANGES may be only partially populated and should not
1350 be used).
1352 This is implemented by re-parsing the relevant source line(s). */
1354 static const char *
1355 get_substring_ranges_for_loc (cpp_reader *pfile,
1356 string_concat_db *concats,
1357 location_t strloc,
1358 enum cpp_ttype type,
1359 cpp_substring_ranges &ranges)
1361 gcc_assert (pfile);
1363 if (strloc == UNKNOWN_LOCATION)
1364 return "unknown location";
1366 /* Reparsing the strings requires accurate location information.
1367 If -ftrack-macro-expansion has been overridden from its default
1368 of 2, then we might have a location of a macro expansion point,
1369 rather than the location of the literal itself.
1370 Avoid this by requiring that we have full macro expansion tracking
1371 for substring locations to be available. */
1372 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1373 return "track_macro_expansion != 2";
1375 /* If #line or # 44 "file"-style directives are present, then there's
1376 no guarantee that the line numbers we have can be used to locate
1377 the strings. For example, we might have a .i file with # directives
1378 pointing back to lines within a .c file, but the .c file might
1379 have been edited since the .i file was created.
1380 In such a case, the safest course is to disable on-demand substring
1381 locations. */
1382 if (line_table->seen_line_directive)
1383 return "seen line directive";
1385 /* If string concatenation has occurred at STRLOC, get the locations
1386 of all of the literal tokens making up the compound string.
1387 Otherwise, just use STRLOC. */
1388 int num_locs = 1;
1389 location_t *strlocs = &strloc;
1390 if (concats)
1391 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1393 auto_cpp_string_vec strs (num_locs);
1394 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1395 for (int i = 0; i < num_locs; i++)
1397 /* Get range of strloc. We will use it to locate the start and finish
1398 of the literal token within the line. */
1399 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1401 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1403 /* If the string token was within a macro expansion, then we can
1404 cope with it for the simple case where we have a single token.
1405 Otherwise, bail out. */
1406 if (src_range.m_start != src_range.m_finish)
1407 return "macro expansion";
1409 else
1411 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1412 /* If so, we can't reliably determine where the token started within
1413 its line. */
1414 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1416 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1417 /* If so, we can't reliably determine where the token finished
1418 within its line. */
1419 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1422 expanded_location start
1423 = expand_location_to_spelling_point (src_range.m_start,
1424 LOCATION_ASPECT_START);
1425 expanded_location finish
1426 = expand_location_to_spelling_point (src_range.m_finish,
1427 LOCATION_ASPECT_FINISH);
1428 if (start.file != finish.file)
1429 return "range endpoints are in different files";
1430 if (start.line != finish.line)
1431 return "range endpoints are on different lines";
1432 if (start.column > finish.column)
1433 return "range endpoints are reversed";
1435 char_span line = location_get_source_line (start.file, start.line);
1436 if (!line)
1437 return "unable to read source line";
1439 /* Determine the location of the literal (including quotes
1440 and leading prefix chars, such as the 'u' in a u""
1441 token). */
1442 size_t literal_length = finish.column - start.column + 1;
1444 /* Ensure that we don't crash if we got the wrong location. */
1445 if (line.length () < (start.column - 1 + literal_length))
1446 return "line is not wide enough";
1448 char_span literal = line.subspan (start.column - 1, literal_length);
1450 cpp_string from;
1451 from.len = literal_length;
1452 /* Make a copy of the literal, to avoid having to rely on
1453 the lifetime of the copy of the line within the cache.
1454 This will be released by the auto_cpp_string_vec dtor. */
1455 from.text = (unsigned char *)literal.xstrdup ();
1456 strs.safe_push (from);
1458 /* For very long lines, a new linemap could have started
1459 halfway through the token.
1460 Ensure that the loc_reader uses the linemap of the
1461 *end* of the token for its start location. */
1462 const line_map_ordinary *start_ord_map;
1463 linemap_resolve_location (line_table, src_range.m_start,
1464 LRK_SPELLING_LOCATION, &start_ord_map);
1465 const line_map_ordinary *final_ord_map;
1466 linemap_resolve_location (line_table, src_range.m_finish,
1467 LRK_SPELLING_LOCATION, &final_ord_map);
1468 if (start_ord_map == NULL || final_ord_map == NULL)
1469 return "failed to get ordinary maps";
1470 /* Bulletproofing. We ought to only have different ordinary maps
1471 for start vs finish due to line-length jumps. */
1472 if (start_ord_map != final_ord_map
1473 && start_ord_map->to_file != final_ord_map->to_file)
1474 return "start and finish are spelled in different ordinary maps";
1475 location_t start_loc
1476 = linemap_position_for_line_and_column (line_table, final_ord_map,
1477 start.line, start.column);
1479 cpp_string_location_reader loc_reader (start_loc, line_table);
1480 loc_readers.safe_push (loc_reader);
1483 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1484 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1485 loc_readers.address (),
1486 num_locs, &ranges, type);
1487 if (err)
1488 return err;
1490 /* Success: "ranges" should now contain information on the string. */
1491 return NULL;
1494 /* Attempt to populate *OUT_LOC with source location information on the
1495 given characters within the string literal found at STRLOC.
1496 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1497 character set.
1499 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1500 and string literal "012345\n789"
1501 *OUT_LOC is written to with:
1502 "012345\n789"
1503 ~^~~~~
1505 If CONCATS is non-NULL, then any string literals that the token at
1506 STRLOC was concatenated with are also considered.
1508 This is implemented by re-parsing the relevant source line(s).
1510 Return NULL if successful, or an error message if any errors occurred.
1511 Error messages are intended for GCC developers (to help debugging) rather
1512 than for end-users. */
1514 const char *
1515 get_location_within_string (cpp_reader *pfile,
1516 string_concat_db *concats,
1517 location_t strloc,
1518 enum cpp_ttype type,
1519 int caret_idx, int start_idx, int end_idx,
1520 location_t *out_loc)
1522 gcc_checking_assert (caret_idx >= 0);
1523 gcc_checking_assert (start_idx >= 0);
1524 gcc_checking_assert (end_idx >= 0);
1525 gcc_assert (out_loc);
1527 cpp_substring_ranges ranges;
1528 const char *err
1529 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1530 if (err)
1531 return err;
1533 if (caret_idx >= ranges.get_num_ranges ())
1534 return "caret_idx out of range";
1535 if (start_idx >= ranges.get_num_ranges ())
1536 return "start_idx out of range";
1537 if (end_idx >= ranges.get_num_ranges ())
1538 return "end_idx out of range";
1540 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1541 ranges.get_range (start_idx).m_start,
1542 ranges.get_range (end_idx).m_finish);
1543 return NULL;
1546 #if CHECKING_P
1548 namespace selftest {
1550 /* Selftests of location handling. */
1552 /* Attempt to populate *OUT_RANGE with source location information on the
1553 given character within the string literal found at STRLOC.
1554 CHAR_IDX refers to an offset within the execution character set.
1555 If CONCATS is non-NULL, then any string literals that the token at
1556 STRLOC was concatenated with are also considered.
1558 This is implemented by re-parsing the relevant source line(s).
1560 Return NULL if successful, or an error message if any errors occurred.
1561 Error messages are intended for GCC developers (to help debugging) rather
1562 than for end-users. */
1564 static const char *
1565 get_source_range_for_char (cpp_reader *pfile,
1566 string_concat_db *concats,
1567 location_t strloc,
1568 enum cpp_ttype type,
1569 int char_idx,
1570 source_range *out_range)
1572 gcc_checking_assert (char_idx >= 0);
1573 gcc_assert (out_range);
1575 cpp_substring_ranges ranges;
1576 const char *err
1577 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1578 if (err)
1579 return err;
1581 if (char_idx >= ranges.get_num_ranges ())
1582 return "char_idx out of range";
1584 *out_range = ranges.get_range (char_idx);
1585 return NULL;
1588 /* As get_source_range_for_char, but write to *OUT the number
1589 of ranges that are available. */
1591 static const char *
1592 get_num_source_ranges_for_substring (cpp_reader *pfile,
1593 string_concat_db *concats,
1594 location_t strloc,
1595 enum cpp_ttype type,
1596 int *out)
1598 gcc_assert (out);
1600 cpp_substring_ranges ranges;
1601 const char *err
1602 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1604 if (err)
1605 return err;
1607 *out = ranges.get_num_ranges ();
1608 return NULL;
1611 /* Selftests of location handling. */
1613 /* Verify that compare() on linenum_type handles comparisons over the full
1614 range of the type. */
1616 static void
1617 test_linenum_comparisons ()
1619 linenum_type min_line (0);
1620 linenum_type max_line (0xffffffff);
1621 ASSERT_EQ (0, compare (min_line, min_line));
1622 ASSERT_EQ (0, compare (max_line, max_line));
1624 ASSERT_GT (compare (max_line, min_line), 0);
1625 ASSERT_LT (compare (min_line, max_line), 0);
1628 /* Helper function for verifying location data: when location_t
1629 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1630 as having column 0. */
1632 static bool
1633 should_have_column_data_p (location_t loc)
1635 if (IS_ADHOC_LOC (loc))
1636 loc = get_location_from_adhoc_loc (line_table, loc);
1637 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1638 return false;
1639 return true;
1642 /* Selftest for should_have_column_data_p. */
1644 static void
1645 test_should_have_column_data_p ()
1647 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1648 ASSERT_TRUE
1649 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1650 ASSERT_FALSE
1651 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1654 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1655 on LOC. */
1657 static void
1658 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1659 location_t loc)
1661 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1662 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1663 /* If location_t values are sufficiently high, then column numbers
1664 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1665 When close to the threshold, column numbers *may* be present: if
1666 the final linemap before the threshold contains a line that straddles
1667 the threshold, locations in that line have column information. */
1668 if (should_have_column_data_p (loc))
1669 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1672 /* Various selftests involve constructing a line table and one or more
1673 line maps within it.
1675 For maximum test coverage we want to run these tests with a variety
1676 of situations:
1677 - line_table->default_range_bits: some frontends use a non-zero value
1678 and others use zero
1679 - the fallback modes within line-map.c: there are various threshold
1680 values for location_t beyond line-map.c changes
1681 behavior (disabling of the range-packing optimization, disabling
1682 of column-tracking). We can exercise these by starting the line_table
1683 at interesting values at or near these thresholds.
1685 The following struct describes a particular case within our test
1686 matrix. */
1688 struct line_table_case
1690 line_table_case (int default_range_bits, int base_location)
1691 : m_default_range_bits (default_range_bits),
1692 m_base_location (base_location)
1695 int m_default_range_bits;
1696 int m_base_location;
1699 /* Constructor. Store the old value of line_table, and create a new
1700 one, using sane defaults. */
1702 line_table_test::line_table_test ()
1704 gcc_assert (saved_line_table == NULL);
1705 saved_line_table = line_table;
1706 line_table = ggc_alloc<line_maps> ();
1707 linemap_init (line_table, BUILTINS_LOCATION);
1708 gcc_assert (saved_line_table->reallocator);
1709 line_table->reallocator = saved_line_table->reallocator;
1710 gcc_assert (saved_line_table->round_alloc_size);
1711 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1712 line_table->default_range_bits = 0;
1715 /* Constructor. Store the old value of line_table, and create a new
1716 one, using the sitation described in CASE_. */
1718 line_table_test::line_table_test (const line_table_case &case_)
1720 gcc_assert (saved_line_table == NULL);
1721 saved_line_table = line_table;
1722 line_table = ggc_alloc<line_maps> ();
1723 linemap_init (line_table, BUILTINS_LOCATION);
1724 gcc_assert (saved_line_table->reallocator);
1725 line_table->reallocator = saved_line_table->reallocator;
1726 gcc_assert (saved_line_table->round_alloc_size);
1727 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1728 line_table->default_range_bits = case_.m_default_range_bits;
1729 if (case_.m_base_location)
1731 line_table->highest_location = case_.m_base_location;
1732 line_table->highest_line = case_.m_base_location;
1736 /* Destructor. Restore the old value of line_table. */
1738 line_table_test::~line_table_test ()
1740 gcc_assert (saved_line_table != NULL);
1741 line_table = saved_line_table;
1742 saved_line_table = NULL;
1745 /* Verify basic operation of ordinary linemaps. */
1747 static void
1748 test_accessing_ordinary_linemaps (const line_table_case &case_)
1750 line_table_test ltt (case_);
1752 /* Build a simple linemap describing some locations. */
1753 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1755 linemap_line_start (line_table, 1, 100);
1756 location_t loc_a = linemap_position_for_column (line_table, 1);
1757 location_t loc_b = linemap_position_for_column (line_table, 23);
1759 linemap_line_start (line_table, 2, 100);
1760 location_t loc_c = linemap_position_for_column (line_table, 1);
1761 location_t loc_d = linemap_position_for_column (line_table, 17);
1763 /* Example of a very long line. */
1764 linemap_line_start (line_table, 3, 2000);
1765 location_t loc_e = linemap_position_for_column (line_table, 700);
1767 /* Transitioning back to a short line. */
1768 linemap_line_start (line_table, 4, 0);
1769 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1771 if (should_have_column_data_p (loc_back_to_short))
1773 /* Verify that we switched to short lines in the linemap. */
1774 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1775 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1778 /* Example of a line that will eventually be seen to be longer
1779 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1780 below that. */
1781 linemap_line_start (line_table, 5, 2000);
1783 location_t loc_start_of_very_long_line
1784 = linemap_position_for_column (line_table, 2000);
1785 location_t loc_too_wide
1786 = linemap_position_for_column (line_table, 4097);
1787 location_t loc_too_wide_2
1788 = linemap_position_for_column (line_table, 4098);
1790 /* ...and back to a sane line length. */
1791 linemap_line_start (line_table, 6, 100);
1792 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1794 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1796 /* Multiple files. */
1797 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1798 linemap_line_start (line_table, 1, 200);
1799 location_t loc_f = linemap_position_for_column (line_table, 150);
1800 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1802 /* Verify that we can recover the location info. */
1803 assert_loceq ("foo.c", 1, 1, loc_a);
1804 assert_loceq ("foo.c", 1, 23, loc_b);
1805 assert_loceq ("foo.c", 2, 1, loc_c);
1806 assert_loceq ("foo.c", 2, 17, loc_d);
1807 assert_loceq ("foo.c", 3, 700, loc_e);
1808 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1810 /* In the very wide line, the initial location should be fully tracked. */
1811 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1812 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1813 be disabled. */
1814 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1815 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1816 /*...and column-tracking should be re-enabled for subsequent lines. */
1817 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1819 assert_loceq ("bar.c", 1, 150, loc_f);
1821 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1822 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1824 /* Verify using make_location to build a range, and extracting data
1825 back from it. */
1826 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1827 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1828 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1829 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1830 ASSERT_EQ (loc_b, src_range.m_start);
1831 ASSERT_EQ (loc_d, src_range.m_finish);
1834 /* Verify various properties of UNKNOWN_LOCATION. */
1836 static void
1837 test_unknown_location ()
1839 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1840 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1841 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1844 /* Verify various properties of BUILTINS_LOCATION. */
1846 static void
1847 test_builtins ()
1849 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1850 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1853 /* Regression test for make_location.
1854 Ensure that we use pure locations for the start/finish of the range,
1855 rather than storing a packed or ad-hoc range as the start/finish. */
1857 static void
1858 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1860 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1861 with C++ frontend.
1862 ....................0000000001111111111222.
1863 ....................1234567890123456789012. */
1864 const char *content = " r += !aaa == bbb;\n";
1865 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1866 line_table_test ltt (case_);
1867 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1869 const location_t c11 = linemap_position_for_column (line_table, 11);
1870 const location_t c12 = linemap_position_for_column (line_table, 12);
1871 const location_t c13 = linemap_position_for_column (line_table, 13);
1872 const location_t c14 = linemap_position_for_column (line_table, 14);
1873 const location_t c21 = linemap_position_for_column (line_table, 21);
1875 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1876 return;
1878 /* Use column 13 for the caret location, arbitrarily, to verify that we
1879 handle start != caret. */
1880 const location_t aaa = make_location (c13, c12, c14);
1881 ASSERT_EQ (c13, get_pure_location (aaa));
1882 ASSERT_EQ (c12, get_start (aaa));
1883 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1884 ASSERT_EQ (c14, get_finish (aaa));
1885 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1887 /* Make a location using a location with a range as the start-point. */
1888 const location_t not_aaa = make_location (c11, aaa, c14);
1889 ASSERT_EQ (c11, get_pure_location (not_aaa));
1890 /* It should use the start location of the range, not store the range
1891 itself. */
1892 ASSERT_EQ (c12, get_start (not_aaa));
1893 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1894 ASSERT_EQ (c14, get_finish (not_aaa));
1895 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1897 /* Similarly, make a location with a range as the end-point. */
1898 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1899 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1900 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1901 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1902 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1903 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1904 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1905 /* It should use the finish location of the range, not store the range
1906 itself. */
1907 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1908 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1909 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1910 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1911 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1914 /* Verify reading of input files (e.g. for caret-based diagnostics). */
1916 static void
1917 test_reading_source_line ()
1919 /* Create a tempfile and write some text to it. */
1920 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1921 "01234567890123456789\n"
1922 "This is the test text\n"
1923 "This is the 3rd line");
1925 /* Read back a specific line from the tempfile. */
1926 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1927 ASSERT_TRUE (source_line);
1928 ASSERT_TRUE (source_line.get_buffer () != NULL);
1929 ASSERT_EQ (20, source_line.length ());
1930 ASSERT_TRUE (!strncmp ("This is the 3rd line",
1931 source_line.get_buffer (), source_line.length ()));
1933 source_line = location_get_source_line (tmp.get_filename (), 2);
1934 ASSERT_TRUE (source_line);
1935 ASSERT_TRUE (source_line.get_buffer () != NULL);
1936 ASSERT_EQ (21, source_line.length ());
1937 ASSERT_TRUE (!strncmp ("This is the test text",
1938 source_line.get_buffer (), source_line.length ()));
1940 source_line = location_get_source_line (tmp.get_filename (), 4);
1941 ASSERT_FALSE (source_line);
1942 ASSERT_TRUE (source_line.get_buffer () == NULL);
1945 /* Tests of lexing. */
1947 /* Verify that token TOK from PARSER has cpp_token_as_text
1948 equal to EXPECTED_TEXT. */
1950 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1951 SELFTEST_BEGIN_STMT \
1952 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1953 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1954 SELFTEST_END_STMT
1956 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1957 and ranges from EXP_START_COL to EXP_FINISH_COL.
1958 Use LOC as the effective location of the selftest. */
1960 static void
1961 assert_token_loc_eq (const location &loc,
1962 const cpp_token *tok,
1963 const char *exp_filename, int exp_linenum,
1964 int exp_start_col, int exp_finish_col)
1966 location_t tok_loc = tok->src_loc;
1967 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1968 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1970 /* If location_t values are sufficiently high, then column numbers
1971 will be unavailable. */
1972 if (!should_have_column_data_p (tok_loc))
1973 return;
1975 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1976 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1977 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1978 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1981 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1982 SELFTEST_LOCATION as the effective location of the selftest. */
1984 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1985 EXP_START_COL, EXP_FINISH_COL) \
1986 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1987 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1989 /* Test of lexing a file using libcpp, verifying tokens and their
1990 location information. */
1992 static void
1993 test_lexer (const line_table_case &case_)
1995 /* Create a tempfile and write some text to it. */
1996 const char *content =
1997 /*00000000011111111112222222222333333.3333444444444.455555555556
1998 12345678901234567890123456789012345.6789012345678.901234567890. */
1999 ("test_name /* c-style comment */\n"
2000 " \"test literal\"\n"
2001 " // test c++-style comment\n"
2002 " 42\n");
2003 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2005 line_table_test ltt (case_);
2007 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2009 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2010 ASSERT_NE (fname, NULL);
2012 /* Verify that we get the expected tokens back, with the correct
2013 location information. */
2015 location_t loc;
2016 const cpp_token *tok;
2017 tok = cpp_get_token_with_location (parser, &loc);
2018 ASSERT_NE (tok, NULL);
2019 ASSERT_EQ (tok->type, CPP_NAME);
2020 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2021 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2023 tok = cpp_get_token_with_location (parser, &loc);
2024 ASSERT_NE (tok, NULL);
2025 ASSERT_EQ (tok->type, CPP_STRING);
2026 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2027 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2029 tok = cpp_get_token_with_location (parser, &loc);
2030 ASSERT_NE (tok, NULL);
2031 ASSERT_EQ (tok->type, CPP_NUMBER);
2032 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2033 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2035 tok = cpp_get_token_with_location (parser, &loc);
2036 ASSERT_NE (tok, NULL);
2037 ASSERT_EQ (tok->type, CPP_EOF);
2039 cpp_finish (parser, NULL);
2040 cpp_destroy (parser);
2043 /* Forward decls. */
2045 struct lexer_test;
2046 class lexer_test_options;
2048 /* A class for specifying options of a lexer_test.
2049 The "apply" vfunc is called during the lexer_test constructor. */
2051 class lexer_test_options
2053 public:
2054 virtual void apply (lexer_test &) = 0;
2057 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2058 in its dtor.
2060 This is needed by struct lexer_test to ensure that the cleanup of the
2061 cpp_reader happens *after* the cleanup of the temp_source_file. */
2063 class cpp_reader_ptr
2065 public:
2066 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2068 ~cpp_reader_ptr ()
2070 cpp_finish (m_ptr, NULL);
2071 cpp_destroy (m_ptr);
2074 operator cpp_reader * () const { return m_ptr; }
2076 private:
2077 cpp_reader *m_ptr;
2080 /* A struct for writing lexer tests. */
2082 struct lexer_test
2084 lexer_test (const line_table_case &case_, const char *content,
2085 lexer_test_options *options);
2086 ~lexer_test ();
2088 const cpp_token *get_token ();
2090 /* The ordering of these fields matters.
2091 The line_table_test must be first, since the cpp_reader_ptr
2092 uses it.
2093 The cpp_reader must be cleaned up *after* the temp_source_file
2094 since the filenames in input.c's input cache are owned by the
2095 cpp_reader; in particular, when ~temp_source_file evicts the
2096 filename the filenames must still be alive. */
2097 line_table_test m_ltt;
2098 cpp_reader_ptr m_parser;
2099 temp_source_file m_tempfile;
2100 string_concat_db m_concats;
2101 bool m_implicitly_expect_EOF;
2104 /* Use an EBCDIC encoding for the execution charset, specifically
2105 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2107 This exercises iconv integration within libcpp.
2108 Not every build of iconv supports the given charset,
2109 so we need to flag this error and handle it gracefully. */
2111 class ebcdic_execution_charset : public lexer_test_options
2113 public:
2114 ebcdic_execution_charset () : m_num_iconv_errors (0)
2116 gcc_assert (s_singleton == NULL);
2117 s_singleton = this;
2119 ~ebcdic_execution_charset ()
2121 gcc_assert (s_singleton == this);
2122 s_singleton = NULL;
2125 void apply (lexer_test &test) FINAL OVERRIDE
2127 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2128 cpp_opts->narrow_charset = "IBM1047";
2130 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2131 callbacks->diagnostic = on_diagnostic;
2134 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2135 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2136 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2137 rich_location *richloc ATTRIBUTE_UNUSED,
2138 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2139 ATTRIBUTE_FPTR_PRINTF(5,0)
2141 gcc_assert (s_singleton);
2142 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2143 const char *msg = "conversion from %s to %s not supported by iconv";
2144 #ifdef ENABLE_NLS
2145 msg = dgettext ("cpplib", msg);
2146 #endif
2147 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2148 when the local iconv build doesn't support the conversion. */
2149 if (strcmp (msgid, msg) == 0)
2151 s_singleton->m_num_iconv_errors++;
2152 return true;
2155 /* Otherwise, we have an unexpected error. */
2156 abort ();
2159 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2161 private:
2162 static ebcdic_execution_charset *s_singleton;
2163 int m_num_iconv_errors;
2166 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2168 /* A lexer_test_options subclass that records a list of diagnostic
2169 messages emitted by the lexer. */
2171 class lexer_diagnostic_sink : public lexer_test_options
2173 public:
2174 lexer_diagnostic_sink ()
2176 gcc_assert (s_singleton == NULL);
2177 s_singleton = this;
2179 ~lexer_diagnostic_sink ()
2181 gcc_assert (s_singleton == this);
2182 s_singleton = NULL;
2184 int i;
2185 char *str;
2186 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2187 free (str);
2190 void apply (lexer_test &test) FINAL OVERRIDE
2192 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2193 callbacks->diagnostic = on_diagnostic;
2196 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2197 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2198 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2199 rich_location *richloc ATTRIBUTE_UNUSED,
2200 const char *msgid, va_list *ap)
2201 ATTRIBUTE_FPTR_PRINTF(5,0)
2203 char *msg = xvasprintf (msgid, *ap);
2204 s_singleton->m_diagnostics.safe_push (msg);
2205 return true;
2208 auto_vec<char *> m_diagnostics;
2210 private:
2211 static lexer_diagnostic_sink *s_singleton;
2214 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2216 /* Constructor. Override line_table with a new instance based on CASE_,
2217 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2218 start parsing the tempfile. */
2220 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2221 lexer_test_options *options)
2222 : m_ltt (case_),
2223 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2224 /* Create a tempfile and write the text to it. */
2225 m_tempfile (SELFTEST_LOCATION, ".c", content),
2226 m_concats (),
2227 m_implicitly_expect_EOF (true)
2229 if (options)
2230 options->apply (*this);
2232 cpp_init_iconv (m_parser);
2234 /* Parse the file. */
2235 const char *fname = cpp_read_main_file (m_parser,
2236 m_tempfile.get_filename ());
2237 ASSERT_NE (fname, NULL);
2240 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2242 lexer_test::~lexer_test ()
2244 location_t loc;
2245 const cpp_token *tok;
2247 if (m_implicitly_expect_EOF)
2249 tok = cpp_get_token_with_location (m_parser, &loc);
2250 ASSERT_NE (tok, NULL);
2251 ASSERT_EQ (tok->type, CPP_EOF);
2255 /* Get the next token from m_parser. */
2257 const cpp_token *
2258 lexer_test::get_token ()
2260 location_t loc;
2261 const cpp_token *tok;
2263 tok = cpp_get_token_with_location (m_parser, &loc);
2264 ASSERT_NE (tok, NULL);
2265 return tok;
2268 /* Verify that locations within string literals are correctly handled. */
2270 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2271 using the string concatenation database for TEST.
2273 Assert that the character at index IDX is on EXPECTED_LINE,
2274 and that it begins at column EXPECTED_START_COL and ends at
2275 EXPECTED_FINISH_COL (unless the locations are beyond
2276 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2277 columns). */
2279 static void
2280 assert_char_at_range (const location &loc,
2281 lexer_test& test,
2282 location_t strloc, enum cpp_ttype type, int idx,
2283 int expected_line, int expected_start_col,
2284 int expected_finish_col)
2286 cpp_reader *pfile = test.m_parser;
2287 string_concat_db *concats = &test.m_concats;
2289 source_range actual_range = source_range();
2290 const char *err
2291 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2292 &actual_range);
2293 if (should_have_column_data_p (strloc))
2294 ASSERT_EQ_AT (loc, NULL, err);
2295 else
2297 ASSERT_STREQ_AT (loc,
2298 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2299 err);
2300 return;
2303 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2304 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2305 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2306 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2308 if (should_have_column_data_p (actual_range.m_start))
2310 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2311 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2313 if (should_have_column_data_p (actual_range.m_finish))
2315 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2316 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2320 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2321 the effective location of any errors. */
2323 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2324 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2325 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2326 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2327 (EXPECTED_FINISH_COL))
2329 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2330 using the string concatenation database for TEST.
2332 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2334 static void
2335 assert_num_substring_ranges (const location &loc,
2336 lexer_test& test,
2337 location_t strloc,
2338 enum cpp_ttype type,
2339 int expected_num_ranges)
2341 cpp_reader *pfile = test.m_parser;
2342 string_concat_db *concats = &test.m_concats;
2344 int actual_num_ranges = -1;
2345 const char *err
2346 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2347 &actual_num_ranges);
2348 if (should_have_column_data_p (strloc))
2349 ASSERT_EQ_AT (loc, NULL, err);
2350 else
2352 ASSERT_STREQ_AT (loc,
2353 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2354 err);
2355 return;
2357 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2360 /* Macro for calling assert_num_substring_ranges, supplying
2361 SELFTEST_LOCATION for the effective location of any errors. */
2363 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2364 EXPECTED_NUM_RANGES) \
2365 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2366 (TYPE), (EXPECTED_NUM_RANGES))
2369 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2370 returns an error (using the string concatenation database for TEST). */
2372 static void
2373 assert_has_no_substring_ranges (const location &loc,
2374 lexer_test& test,
2375 location_t strloc,
2376 enum cpp_ttype type,
2377 const char *expected_err)
2379 cpp_reader *pfile = test.m_parser;
2380 string_concat_db *concats = &test.m_concats;
2381 cpp_substring_ranges ranges;
2382 const char *actual_err
2383 = get_substring_ranges_for_loc (pfile, concats, strloc,
2384 type, ranges);
2385 if (should_have_column_data_p (strloc))
2386 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2387 else
2388 ASSERT_STREQ_AT (loc,
2389 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2390 actual_err);
2393 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2394 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2395 (STRLOC), (TYPE), (ERR))
2397 /* Lex a simple string literal. Verify the substring location data, before
2398 and after running cpp_interpret_string on it. */
2400 static void
2401 test_lexer_string_locations_simple (const line_table_case &case_)
2403 /* Digits 0-9 (with 0 at column 10), the simple way.
2404 ....................000000000.11111111112.2222222223333333333
2405 ....................123456789.01234567890.1234567890123456789
2406 We add a trailing comment to ensure that we correctly locate
2407 the end of the string literal token. */
2408 const char *content = " \"0123456789\" /* not a string */\n";
2409 lexer_test test (case_, content, NULL);
2411 /* Verify that we get the expected token back, with the correct
2412 location information. */
2413 const cpp_token *tok = test.get_token ();
2414 ASSERT_EQ (tok->type, CPP_STRING);
2415 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2416 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2418 /* At this point in lexing, the quote characters are treated as part of
2419 the string (they are stripped off by cpp_interpret_string). */
2421 ASSERT_EQ (tok->val.str.len, 12);
2423 /* Verify that cpp_interpret_string works. */
2424 cpp_string dst_string;
2425 const enum cpp_ttype type = CPP_STRING;
2426 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2427 &dst_string, type);
2428 ASSERT_TRUE (result);
2429 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2430 free (const_cast <unsigned char *> (dst_string.text));
2432 /* Verify ranges of individual characters. This no longer includes the
2433 opening quote, but does include the closing quote. */
2434 for (int i = 0; i <= 10; i++)
2435 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2436 10 + i, 10 + i);
2438 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2441 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2442 encoding. */
2444 static void
2445 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2447 /* EBCDIC support requires iconv. */
2448 if (!HAVE_ICONV)
2449 return;
2451 /* Digits 0-9 (with 0 at column 10), the simple way.
2452 ....................000000000.11111111112.2222222223333333333
2453 ....................123456789.01234567890.1234567890123456789
2454 We add a trailing comment to ensure that we correctly locate
2455 the end of the string literal token. */
2456 const char *content = " \"0123456789\" /* not a string */\n";
2457 ebcdic_execution_charset use_ebcdic;
2458 lexer_test test (case_, content, &use_ebcdic);
2460 /* Verify that we get the expected token back, with the correct
2461 location information. */
2462 const cpp_token *tok = test.get_token ();
2463 ASSERT_EQ (tok->type, CPP_STRING);
2464 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2465 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2467 /* At this point in lexing, the quote characters are treated as part of
2468 the string (they are stripped off by cpp_interpret_string). */
2470 ASSERT_EQ (tok->val.str.len, 12);
2472 /* The remainder of the test requires an iconv implementation that
2473 can convert from UTF-8 to the EBCDIC encoding requested above. */
2474 if (use_ebcdic.iconv_errors_occurred_p ())
2475 return;
2477 /* Verify that cpp_interpret_string works. */
2478 cpp_string dst_string;
2479 const enum cpp_ttype type = CPP_STRING;
2480 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2481 &dst_string, type);
2482 ASSERT_TRUE (result);
2483 /* We should now have EBCDIC-encoded text, specifically
2484 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2485 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2486 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2487 (const char *)dst_string.text);
2488 free (const_cast <unsigned char *> (dst_string.text));
2490 /* Verify that we don't attempt to record substring location information
2491 for such cases. */
2492 ASSERT_HAS_NO_SUBSTRING_RANGES
2493 (test, tok->src_loc, type,
2494 "execution character set != source character set");
2497 /* Lex a string literal containing a hex-escaped character.
2498 Verify the substring location data, before and after running
2499 cpp_interpret_string on it. */
2501 static void
2502 test_lexer_string_locations_hex (const line_table_case &case_)
2504 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2505 and with a space in place of digit 6, to terminate the escaped
2506 hex code.
2507 ....................000000000.111111.11112222.
2508 ....................123456789.012345.67890123. */
2509 const char *content = " \"01234\\x35 789\"\n";
2510 lexer_test test (case_, content, NULL);
2512 /* Verify that we get the expected token back, with the correct
2513 location information. */
2514 const cpp_token *tok = test.get_token ();
2515 ASSERT_EQ (tok->type, CPP_STRING);
2516 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2517 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2519 /* At this point in lexing, the quote characters are treated as part of
2520 the string (they are stripped off by cpp_interpret_string). */
2521 ASSERT_EQ (tok->val.str.len, 15);
2523 /* Verify that cpp_interpret_string works. */
2524 cpp_string dst_string;
2525 const enum cpp_ttype type = CPP_STRING;
2526 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2527 &dst_string, type);
2528 ASSERT_TRUE (result);
2529 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2530 free (const_cast <unsigned char *> (dst_string.text));
2532 /* Verify ranges of individual characters. This no longer includes the
2533 opening quote, but does include the closing quote. */
2534 for (int i = 0; i <= 4; i++)
2535 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2536 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2537 for (int i = 6; i <= 10; i++)
2538 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2540 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2543 /* Lex a string literal containing an octal-escaped character.
2544 Verify the substring location data after running cpp_interpret_string
2545 on it. */
2547 static void
2548 test_lexer_string_locations_oct (const line_table_case &case_)
2550 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2551 and with a space in place of digit 6, to terminate the escaped
2552 octal code.
2553 ....................000000000.111111.11112222.2222223333333333444
2554 ....................123456789.012345.67890123.4567890123456789012 */
2555 const char *content = " \"01234\\065 789\" /* not a string */\n";
2556 lexer_test test (case_, content, NULL);
2558 /* Verify that we get the expected token back, with the correct
2559 location information. */
2560 const cpp_token *tok = test.get_token ();
2561 ASSERT_EQ (tok->type, CPP_STRING);
2562 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2564 /* Verify that cpp_interpret_string works. */
2565 cpp_string dst_string;
2566 const enum cpp_ttype type = CPP_STRING;
2567 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2568 &dst_string, type);
2569 ASSERT_TRUE (result);
2570 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2571 free (const_cast <unsigned char *> (dst_string.text));
2573 /* Verify ranges of individual characters. This no longer includes the
2574 opening quote, but does include the closing quote. */
2575 for (int i = 0; i < 5; i++)
2576 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2577 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2578 for (int i = 6; i <= 10; i++)
2579 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2581 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2584 /* Test of string literal containing letter escapes. */
2586 static void
2587 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2589 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2590 .....................000000000.1.11111.1.1.11222.22222223333333
2591 .....................123456789.0.12345.6.7.89012.34567890123456. */
2592 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2593 lexer_test test (case_, content, NULL);
2595 /* Verify that we get the expected tokens back. */
2596 const cpp_token *tok = test.get_token ();
2597 ASSERT_EQ (tok->type, CPP_STRING);
2598 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2600 /* Verify ranges of individual characters. */
2601 /* "\t". */
2602 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2603 0, 1, 10, 11);
2604 /* "foo". */
2605 for (int i = 1; i <= 3; i++)
2606 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2607 i, 1, 11 + i, 11 + i);
2608 /* "\\" and "\n". */
2609 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2610 4, 1, 15, 16);
2611 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2612 5, 1, 17, 18);
2614 /* "bar" and closing quote for nul-terminator. */
2615 for (int i = 6; i <= 9; i++)
2616 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2617 i, 1, 13 + i, 13 + i);
2619 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2622 /* Another test of a string literal containing a letter escape.
2623 Based on string seen in
2624 printf ("%-%\n");
2625 in gcc.dg/format/c90-printf-1.c. */
2627 static void
2628 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2630 /* .....................000000000.1111.11.1111.22222222223.
2631 .....................123456789.0123.45.6789.01234567890. */
2632 const char *content = (" \"%-%\\n\" /* non-str */\n");
2633 lexer_test test (case_, content, NULL);
2635 /* Verify that we get the expected tokens back. */
2636 const cpp_token *tok = test.get_token ();
2637 ASSERT_EQ (tok->type, CPP_STRING);
2638 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2640 /* Verify ranges of individual characters. */
2641 /* "%-%". */
2642 for (int i = 0; i < 3; i++)
2643 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2644 i, 1, 10 + i, 10 + i);
2645 /* "\n". */
2646 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2647 3, 1, 13, 14);
2649 /* Closing quote for nul-terminator. */
2650 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2651 4, 1, 15, 15);
2653 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2656 /* Lex a string literal containing UCN 4 characters.
2657 Verify the substring location data after running cpp_interpret_string
2658 on it. */
2660 static void
2661 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2663 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2664 as UCN 4.
2665 ....................000000000.111111.111122.222222223.33333333344444
2666 ....................123456789.012345.678901.234567890.12345678901234 */
2667 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2668 lexer_test test (case_, content, NULL);
2670 /* Verify that we get the expected token back, with the correct
2671 location information. */
2672 const cpp_token *tok = test.get_token ();
2673 ASSERT_EQ (tok->type, CPP_STRING);
2674 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2676 /* Verify that cpp_interpret_string works.
2677 The string should be encoded in the execution character
2678 set. Assuming that that is UTF-8, we should have the following:
2679 ----------- ---- ----- ------- ----------------
2680 Byte offset Byte Octal Unicode Source Column(s)
2681 ----------- ---- ----- ------- ----------------
2682 0 0x30 '0' 10
2683 1 0x31 '1' 11
2684 2 0x32 '2' 12
2685 3 0x33 '3' 13
2686 4 0x34 '4' 14
2687 5 0xE2 \342 U+2174 15-20
2688 6 0x85 \205 (cont) 15-20
2689 7 0xB4 \264 (cont) 15-20
2690 8 0xE2 \342 U+2175 21-26
2691 9 0x85 \205 (cont) 21-26
2692 10 0xB5 \265 (cont) 21-26
2693 11 0x37 '7' 27
2694 12 0x38 '8' 28
2695 13 0x39 '9' 29
2696 14 0x00 30 (closing quote)
2697 ----------- ---- ----- ------- ---------------. */
2699 cpp_string dst_string;
2700 const enum cpp_ttype type = CPP_STRING;
2701 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2702 &dst_string, type);
2703 ASSERT_TRUE (result);
2704 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2705 (const char *)dst_string.text);
2706 free (const_cast <unsigned char *> (dst_string.text));
2708 /* Verify ranges of individual characters. This no longer includes the
2709 opening quote, but does include the closing quote.
2710 '01234'. */
2711 for (int i = 0; i <= 4; i++)
2712 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2713 /* U+2174. */
2714 for (int i = 5; i <= 7; i++)
2715 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2716 /* U+2175. */
2717 for (int i = 8; i <= 10; i++)
2718 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2719 /* '789' and nul terminator */
2720 for (int i = 11; i <= 14; i++)
2721 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2723 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2726 /* Lex a string literal containing UCN 8 characters.
2727 Verify the substring location data after running cpp_interpret_string
2728 on it. */
2730 static void
2731 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2733 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2734 ....................000000000.111111.1111222222.2222333333333.344444
2735 ....................123456789.012345.6789012345.6789012345678.901234 */
2736 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2737 lexer_test test (case_, content, NULL);
2739 /* Verify that we get the expected token back, with the correct
2740 location information. */
2741 const cpp_token *tok = test.get_token ();
2742 ASSERT_EQ (tok->type, CPP_STRING);
2743 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2744 "\"01234\\U00002174\\U00002175789\"");
2746 /* Verify that cpp_interpret_string works.
2747 The UTF-8 encoding of the string is identical to that from
2748 the ucn4 testcase above; the only difference is the column
2749 locations. */
2750 cpp_string dst_string;
2751 const enum cpp_ttype type = CPP_STRING;
2752 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2753 &dst_string, type);
2754 ASSERT_TRUE (result);
2755 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2756 (const char *)dst_string.text);
2757 free (const_cast <unsigned char *> (dst_string.text));
2759 /* Verify ranges of individual characters. This no longer includes the
2760 opening quote, but does include the closing quote.
2761 '01234'. */
2762 for (int i = 0; i <= 4; i++)
2763 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2764 /* U+2174. */
2765 for (int i = 5; i <= 7; i++)
2766 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2767 /* U+2175. */
2768 for (int i = 8; i <= 10; i++)
2769 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2770 /* '789' at columns 35-37 */
2771 for (int i = 11; i <= 13; i++)
2772 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2773 /* Closing quote/nul-terminator at column 38. */
2774 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2776 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2779 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2781 static uint32_t
2782 uint32_from_big_endian (const uint32_t *ptr_be_value)
2784 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2785 return (((uint32_t) buf[0] << 24)
2786 | ((uint32_t) buf[1] << 16)
2787 | ((uint32_t) buf[2] << 8)
2788 | (uint32_t) buf[3]);
2791 /* Lex a wide string literal and verify that attempts to read substring
2792 location data from it fail gracefully. */
2794 static void
2795 test_lexer_string_locations_wide_string (const line_table_case &case_)
2797 /* Digits 0-9.
2798 ....................000000000.11111111112.22222222233333
2799 ....................123456789.01234567890.12345678901234 */
2800 const char *content = " L\"0123456789\" /* non-str */\n";
2801 lexer_test test (case_, content, NULL);
2803 /* Verify that we get the expected token back, with the correct
2804 location information. */
2805 const cpp_token *tok = test.get_token ();
2806 ASSERT_EQ (tok->type, CPP_WSTRING);
2807 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2809 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2810 cpp_string dst_string;
2811 const enum cpp_ttype type = CPP_WSTRING;
2812 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2813 &dst_string, type);
2814 ASSERT_TRUE (result);
2815 /* The cpp_reader defaults to big-endian with
2816 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2817 now be encoded as UTF-32BE. */
2818 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2819 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2820 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2821 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2822 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2823 free (const_cast <unsigned char *> (dst_string.text));
2825 /* We don't yet support generating substring location information
2826 for L"" strings. */
2827 ASSERT_HAS_NO_SUBSTRING_RANGES
2828 (test, tok->src_loc, type,
2829 "execution character set != source character set");
2832 /* Fetch a big-endian 16-bit value and convert to host endianness. */
2834 static uint16_t
2835 uint16_from_big_endian (const uint16_t *ptr_be_value)
2837 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2838 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2841 /* Lex a u"" string literal and verify that attempts to read substring
2842 location data from it fail gracefully. */
2844 static void
2845 test_lexer_string_locations_string16 (const line_table_case &case_)
2847 /* Digits 0-9.
2848 ....................000000000.11111111112.22222222233333
2849 ....................123456789.01234567890.12345678901234 */
2850 const char *content = " u\"0123456789\" /* non-str */\n";
2851 lexer_test test (case_, content, NULL);
2853 /* Verify that we get the expected token back, with the correct
2854 location information. */
2855 const cpp_token *tok = test.get_token ();
2856 ASSERT_EQ (tok->type, CPP_STRING16);
2857 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2859 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2860 cpp_string dst_string;
2861 const enum cpp_ttype type = CPP_STRING16;
2862 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2863 &dst_string, type);
2864 ASSERT_TRUE (result);
2866 /* The cpp_reader defaults to big-endian, so dst_string should
2867 now be encoded as UTF-16BE. */
2868 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2869 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2870 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2871 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2872 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2873 free (const_cast <unsigned char *> (dst_string.text));
2875 /* We don't yet support generating substring location information
2876 for L"" strings. */
2877 ASSERT_HAS_NO_SUBSTRING_RANGES
2878 (test, tok->src_loc, type,
2879 "execution character set != source character set");
2882 /* Lex a U"" string literal and verify that attempts to read substring
2883 location data from it fail gracefully. */
2885 static void
2886 test_lexer_string_locations_string32 (const line_table_case &case_)
2888 /* Digits 0-9.
2889 ....................000000000.11111111112.22222222233333
2890 ....................123456789.01234567890.12345678901234 */
2891 const char *content = " U\"0123456789\" /* non-str */\n";
2892 lexer_test test (case_, content, NULL);
2894 /* Verify that we get the expected token back, with the correct
2895 location information. */
2896 const cpp_token *tok = test.get_token ();
2897 ASSERT_EQ (tok->type, CPP_STRING32);
2898 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2900 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2901 cpp_string dst_string;
2902 const enum cpp_ttype type = CPP_STRING32;
2903 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2904 &dst_string, type);
2905 ASSERT_TRUE (result);
2907 /* The cpp_reader defaults to big-endian, so dst_string should
2908 now be encoded as UTF-32BE. */
2909 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2910 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2911 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2912 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2913 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2914 free (const_cast <unsigned char *> (dst_string.text));
2916 /* We don't yet support generating substring location information
2917 for L"" strings. */
2918 ASSERT_HAS_NO_SUBSTRING_RANGES
2919 (test, tok->src_loc, type,
2920 "execution character set != source character set");
2923 /* Lex a u8-string literal.
2924 Verify the substring location data after running cpp_interpret_string
2925 on it. */
2927 static void
2928 test_lexer_string_locations_u8 (const line_table_case &case_)
2930 /* Digits 0-9.
2931 ....................000000000.11111111112.22222222233333
2932 ....................123456789.01234567890.12345678901234 */
2933 const char *content = " u8\"0123456789\" /* non-str */\n";
2934 lexer_test test (case_, content, NULL);
2936 /* Verify that we get the expected token back, with the correct
2937 location information. */
2938 const cpp_token *tok = test.get_token ();
2939 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2940 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2942 /* Verify that cpp_interpret_string works. */
2943 cpp_string dst_string;
2944 const enum cpp_ttype type = CPP_STRING;
2945 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2946 &dst_string, type);
2947 ASSERT_TRUE (result);
2948 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2949 free (const_cast <unsigned char *> (dst_string.text));
2951 /* Verify ranges of individual characters. This no longer includes the
2952 opening quote, but does include the closing quote. */
2953 for (int i = 0; i <= 10; i++)
2954 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2957 /* Lex a string literal containing UTF-8 source characters.
2958 Verify the substring location data after running cpp_interpret_string
2959 on it. */
2961 static void
2962 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2964 /* This string literal is written out to the source file as UTF-8,
2965 and is of the form "before mojibake after", where "mojibake"
2966 is written as the following four unicode code points:
2967 U+6587 CJK UNIFIED IDEOGRAPH-6587
2968 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2969 U+5316 CJK UNIFIED IDEOGRAPH-5316
2970 U+3051 HIRAGANA LETTER KE.
2971 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2972 "before" and "after" are 1 byte per unicode character.
2974 The numbering shown are "columns", which are *byte* numbers within
2975 the line, rather than unicode character numbers.
2977 .................... 000000000.1111111.
2978 .................... 123456789.0123456. */
2979 const char *content = (" \"before "
2980 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2981 UTF-8: 0xE6 0x96 0x87
2982 C octal escaped UTF-8: \346\226\207
2983 "column" numbers: 17-19. */
2984 "\346\226\207"
2986 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2987 UTF-8: 0xE5 0xAD 0x97
2988 C octal escaped UTF-8: \345\255\227
2989 "column" numbers: 20-22. */
2990 "\345\255\227"
2992 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2993 UTF-8: 0xE5 0x8C 0x96
2994 C octal escaped UTF-8: \345\214\226
2995 "column" numbers: 23-25. */
2996 "\345\214\226"
2998 /* U+3051 HIRAGANA LETTER KE
2999 UTF-8: 0xE3 0x81 0x91
3000 C octal escaped UTF-8: \343\201\221
3001 "column" numbers: 26-28. */
3002 "\343\201\221"
3004 /* column numbers 29 onwards
3005 2333333.33334444444444
3006 9012345.67890123456789. */
3007 " after\" /* non-str */\n");
3008 lexer_test test (case_, content, NULL);
3010 /* Verify that we get the expected token back, with the correct
3011 location information. */
3012 const cpp_token *tok = test.get_token ();
3013 ASSERT_EQ (tok->type, CPP_STRING);
3014 ASSERT_TOKEN_AS_TEXT_EQ
3015 (test.m_parser, tok,
3016 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3018 /* Verify that cpp_interpret_string works. */
3019 cpp_string dst_string;
3020 const enum cpp_ttype type = CPP_STRING;
3021 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3022 &dst_string, type);
3023 ASSERT_TRUE (result);
3024 ASSERT_STREQ
3025 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3026 (const char *)dst_string.text);
3027 free (const_cast <unsigned char *> (dst_string.text));
3029 /* Verify ranges of individual characters. This no longer includes the
3030 opening quote, but does include the closing quote.
3031 Assuming that both source and execution encodings are UTF-8, we have
3032 a run of 25 octets in each, plus the NUL terminator. */
3033 for (int i = 0; i < 25; i++)
3034 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3035 /* NUL-terminator should use the closing quote at column 35. */
3036 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3038 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3041 /* Test of string literal concatenation. */
3043 static void
3044 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3046 /* Digits 0-9.
3047 .....................000000000.111111.11112222222222
3048 .....................123456789.012345.67890123456789. */
3049 const char *content = (" \"01234\" /* non-str */\n"
3050 " \"56789\" /* non-str */\n");
3051 lexer_test test (case_, content, NULL);
3053 location_t input_locs[2];
3055 /* Verify that we get the expected tokens back. */
3056 auto_vec <cpp_string> input_strings;
3057 const cpp_token *tok_a = test.get_token ();
3058 ASSERT_EQ (tok_a->type, CPP_STRING);
3059 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3060 input_strings.safe_push (tok_a->val.str);
3061 input_locs[0] = tok_a->src_loc;
3063 const cpp_token *tok_b = test.get_token ();
3064 ASSERT_EQ (tok_b->type, CPP_STRING);
3065 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3066 input_strings.safe_push (tok_b->val.str);
3067 input_locs[1] = tok_b->src_loc;
3069 /* Verify that cpp_interpret_string works. */
3070 cpp_string dst_string;
3071 const enum cpp_ttype type = CPP_STRING;
3072 bool result = cpp_interpret_string (test.m_parser,
3073 input_strings.address (), 2,
3074 &dst_string, type);
3075 ASSERT_TRUE (result);
3076 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3077 free (const_cast <unsigned char *> (dst_string.text));
3079 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3080 test.m_concats.record_string_concatenation (2, input_locs);
3082 location_t initial_loc = input_locs[0];
3084 /* "01234" on line 1. */
3085 for (int i = 0; i <= 4; i++)
3086 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3087 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3088 for (int i = 5; i <= 10; i++)
3089 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3091 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3094 /* Another test of string literal concatenation. */
3096 static void
3097 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3099 /* Digits 0-9.
3100 .....................000000000.111.11111112222222
3101 .....................123456789.012.34567890123456. */
3102 const char *content = (" \"01\" /* non-str */\n"
3103 " \"23\" /* non-str */\n"
3104 " \"45\" /* non-str */\n"
3105 " \"67\" /* non-str */\n"
3106 " \"89\" /* non-str */\n");
3107 lexer_test test (case_, content, NULL);
3109 auto_vec <cpp_string> input_strings;
3110 location_t input_locs[5];
3112 /* Verify that we get the expected tokens back. */
3113 for (int i = 0; i < 5; i++)
3115 const cpp_token *tok = test.get_token ();
3116 ASSERT_EQ (tok->type, CPP_STRING);
3117 input_strings.safe_push (tok->val.str);
3118 input_locs[i] = tok->src_loc;
3121 /* Verify that cpp_interpret_string works. */
3122 cpp_string dst_string;
3123 const enum cpp_ttype type = CPP_STRING;
3124 bool result = cpp_interpret_string (test.m_parser,
3125 input_strings.address (), 5,
3126 &dst_string, type);
3127 ASSERT_TRUE (result);
3128 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3129 free (const_cast <unsigned char *> (dst_string.text));
3131 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3132 test.m_concats.record_string_concatenation (5, input_locs);
3134 location_t initial_loc = input_locs[0];
3136 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3137 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3138 and expect get_source_range_for_substring to fail.
3139 However, for a string concatenation test, we can have a case
3140 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3141 but subsequent strings can be after it.
3142 Attempting to detect this within assert_char_at_range
3143 would overcomplicate the logic for the common test cases, so
3144 we detect it here. */
3145 if (should_have_column_data_p (input_locs[0])
3146 && !should_have_column_data_p (input_locs[4]))
3148 /* Verify that get_source_range_for_substring gracefully rejects
3149 this case. */
3150 source_range actual_range;
3151 const char *err
3152 = get_source_range_for_char (test.m_parser, &test.m_concats,
3153 initial_loc, type, 0, &actual_range);
3154 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3155 return;
3158 for (int i = 0; i < 5; i++)
3159 for (int j = 0; j < 2; j++)
3160 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3161 i + 1, 10 + j, 10 + j);
3163 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3164 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3166 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3169 /* Another test of string literal concatenation, this time combined with
3170 various kinds of escaped characters. */
3172 static void
3173 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3175 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3176 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3177 const char *content
3178 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3179 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3180 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3181 lexer_test test (case_, content, NULL);
3183 auto_vec <cpp_string> input_strings;
3184 location_t input_locs[4];
3186 /* Verify that we get the expected tokens back. */
3187 for (int i = 0; i < 4; i++)
3189 const cpp_token *tok = test.get_token ();
3190 ASSERT_EQ (tok->type, CPP_STRING);
3191 input_strings.safe_push (tok->val.str);
3192 input_locs[i] = tok->src_loc;
3195 /* Verify that cpp_interpret_string works. */
3196 cpp_string dst_string;
3197 const enum cpp_ttype type = CPP_STRING;
3198 bool result = cpp_interpret_string (test.m_parser,
3199 input_strings.address (), 4,
3200 &dst_string, type);
3201 ASSERT_TRUE (result);
3202 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3203 free (const_cast <unsigned char *> (dst_string.text));
3205 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3206 test.m_concats.record_string_concatenation (4, input_locs);
3208 location_t initial_loc = input_locs[0];
3210 for (int i = 0; i <= 4; i++)
3211 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3212 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3213 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3214 for (int i = 7; i <= 9; i++)
3215 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3217 /* NUL-terminator should use the location of the final closing quote. */
3218 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3220 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3223 /* Test of string literal in a macro. */
3225 static void
3226 test_lexer_string_locations_macro (const line_table_case &case_)
3228 /* Digits 0-9.
3229 .....................0000000001111111111.22222222223.
3230 .....................1234567890123456789.01234567890. */
3231 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3232 " MACRO");
3233 lexer_test test (case_, content, NULL);
3235 /* Verify that we get the expected tokens back. */
3236 const cpp_token *tok = test.get_token ();
3237 ASSERT_EQ (tok->type, CPP_PADDING);
3239 tok = test.get_token ();
3240 ASSERT_EQ (tok->type, CPP_STRING);
3241 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3243 /* Verify ranges of individual characters. We ought to
3244 see columns within the macro definition. */
3245 for (int i = 0; i <= 10; i++)
3246 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3247 i, 1, 20 + i, 20 + i);
3249 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3251 tok = test.get_token ();
3252 ASSERT_EQ (tok->type, CPP_PADDING);
3255 /* Test of stringification of a macro argument. */
3257 static void
3258 test_lexer_string_locations_stringified_macro_argument
3259 (const line_table_case &case_)
3261 /* .....................000000000111111111122222222223.
3262 .....................123456789012345678901234567890. */
3263 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3264 "MACRO(foo)\n");
3265 lexer_test test (case_, content, NULL);
3267 /* Verify that we get the expected token back. */
3268 const cpp_token *tok = test.get_token ();
3269 ASSERT_EQ (tok->type, CPP_PADDING);
3271 tok = test.get_token ();
3272 ASSERT_EQ (tok->type, CPP_STRING);
3273 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3275 /* We don't support getting the location of a stringified macro
3276 argument. Verify that it fails gracefully. */
3277 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3278 "cpp_interpret_string_1 failed");
3280 tok = test.get_token ();
3281 ASSERT_EQ (tok->type, CPP_PADDING);
3283 tok = test.get_token ();
3284 ASSERT_EQ (tok->type, CPP_PADDING);
3287 /* Ensure that we are fail gracefully if something attempts to pass
3288 in a location that isn't a string literal token. Seen on this code:
3290 const char a[] = " %d ";
3291 __builtin_printf (a, 0.5);
3294 when c-format.c erroneously used the indicated one-character
3295 location as the format string location, leading to a read past the
3296 end of a string buffer in cpp_interpret_string_1. */
3298 static void
3299 test_lexer_string_locations_non_string (const line_table_case &case_)
3301 /* .....................000000000111111111122222222223.
3302 .....................123456789012345678901234567890. */
3303 const char *content = (" a\n");
3304 lexer_test test (case_, content, NULL);
3306 /* Verify that we get the expected token back. */
3307 const cpp_token *tok = test.get_token ();
3308 ASSERT_EQ (tok->type, CPP_NAME);
3309 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3311 /* At this point, libcpp is attempting to interpret the name as a
3312 string literal, despite it not starting with a quote. We don't detect
3313 that, but we should at least fail gracefully. */
3314 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3315 "cpp_interpret_string_1 failed");
3318 /* Ensure that we can read substring information for a token which
3319 starts in one linemap and ends in another . Adapted from
3320 gcc.dg/cpp/pr69985.c. */
3322 static void
3323 test_lexer_string_locations_long_line (const line_table_case &case_)
3325 /* .....................000000.000111111111
3326 .....................123456.789012346789. */
3327 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3328 " \"0123456789012345678901234567890123456789"
3329 "0123456789012345678901234567890123456789"
3330 "0123456789012345678901234567890123456789"
3331 "0123456789\"\n");
3333 lexer_test test (case_, content, NULL);
3335 /* Verify that we get the expected token back. */
3336 const cpp_token *tok = test.get_token ();
3337 ASSERT_EQ (tok->type, CPP_STRING);
3339 if (!should_have_column_data_p (line_table->highest_location))
3340 return;
3342 /* Verify ranges of individual characters. */
3343 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3344 for (int i = 0; i < 131; i++)
3345 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3346 i, 2, 7 + i, 7 + i);
3349 /* Test of locations within a raw string that doesn't contain a newline. */
3351 static void
3352 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3354 /* .....................00.0000000111111111122.
3355 .....................12.3456789012345678901. */
3356 const char *content = ("R\"foo(0123456789)foo\"\n");
3357 lexer_test test (case_, content, NULL);
3359 /* Verify that we get the expected token back. */
3360 const cpp_token *tok = test.get_token ();
3361 ASSERT_EQ (tok->type, CPP_STRING);
3363 /* Verify that cpp_interpret_string works. */
3364 cpp_string dst_string;
3365 const enum cpp_ttype type = CPP_STRING;
3366 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3367 &dst_string, type);
3368 ASSERT_TRUE (result);
3369 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3370 free (const_cast <unsigned char *> (dst_string.text));
3372 if (!should_have_column_data_p (line_table->highest_location))
3373 return;
3375 /* 0-9, plus the nil terminator. */
3376 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3377 for (int i = 0; i < 11; i++)
3378 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3379 i, 1, 7 + i, 7 + i);
3382 /* Test of locations within a raw string that contains a newline. */
3384 static void
3385 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3387 /* .....................00.0000.
3388 .....................12.3456. */
3389 const char *content = ("R\"foo(\n"
3390 /* .....................00000.
3391 .....................12345. */
3392 "hello\n"
3393 "world\n"
3394 /* .....................00000.
3395 .....................12345. */
3396 ")foo\"\n");
3397 lexer_test test (case_, content, NULL);
3399 /* Verify that we get the expected token back. */
3400 const cpp_token *tok = test.get_token ();
3401 ASSERT_EQ (tok->type, CPP_STRING);
3403 /* Verify that cpp_interpret_string works. */
3404 cpp_string dst_string;
3405 const enum cpp_ttype type = CPP_STRING;
3406 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3407 &dst_string, type);
3408 ASSERT_TRUE (result);
3409 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3410 free (const_cast <unsigned char *> (dst_string.text));
3412 if (!should_have_column_data_p (line_table->highest_location))
3413 return;
3415 /* Currently we don't support locations within raw strings that
3416 contain newlines. */
3417 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3418 "range endpoints are on different lines");
3421 /* Test of parsing an unterminated raw string. */
3423 static void
3424 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3426 const char *content = "R\"ouch()ouCh\" /* etc */";
3428 lexer_diagnostic_sink diagnostics;
3429 lexer_test test (case_, content, &diagnostics);
3430 test.m_implicitly_expect_EOF = false;
3432 /* Attempt to parse the raw string. */
3433 const cpp_token *tok = test.get_token ();
3434 ASSERT_EQ (tok->type, CPP_EOF);
3436 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3437 /* We expect the message "unterminated raw string"
3438 in the "cpplib" translation domain.
3439 It's not clear that dgettext is available on all supported hosts,
3440 so this assertion is commented-out for now.
3441 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3442 diagnostics.m_diagnostics[0]);
3446 /* Test of lexing char constants. */
3448 static void
3449 test_lexer_char_constants (const line_table_case &case_)
3451 /* Various char constants.
3452 .....................0000000001111111111.22222222223.
3453 .....................1234567890123456789.01234567890. */
3454 const char *content = (" 'a'\n"
3455 " u'a'\n"
3456 " U'a'\n"
3457 " L'a'\n"
3458 " 'abc'\n");
3459 lexer_test test (case_, content, NULL);
3461 /* Verify that we get the expected tokens back. */
3462 /* 'a'. */
3463 const cpp_token *tok = test.get_token ();
3464 ASSERT_EQ (tok->type, CPP_CHAR);
3465 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3467 unsigned int chars_seen;
3468 int unsignedp;
3469 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3470 &chars_seen, &unsignedp);
3471 ASSERT_EQ (cc, 'a');
3472 ASSERT_EQ (chars_seen, 1);
3474 /* u'a'. */
3475 tok = test.get_token ();
3476 ASSERT_EQ (tok->type, CPP_CHAR16);
3477 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3479 /* U'a'. */
3480 tok = test.get_token ();
3481 ASSERT_EQ (tok->type, CPP_CHAR32);
3482 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3484 /* L'a'. */
3485 tok = test.get_token ();
3486 ASSERT_EQ (tok->type, CPP_WCHAR);
3487 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3489 /* 'abc' (c-char-sequence). */
3490 tok = test.get_token ();
3491 ASSERT_EQ (tok->type, CPP_CHAR);
3492 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3494 /* A table of interesting location_t values, giving one axis of our test
3495 matrix. */
3497 static const location_t boundary_locations[] = {
3498 /* Zero means "don't override the default values for a new line_table". */
3501 /* An arbitrary non-zero value that isn't close to one of
3502 the boundary values below. */
3503 0x10000,
3505 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3506 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3507 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3508 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3509 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3510 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3512 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3513 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3514 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3515 LINE_MAP_MAX_LOCATION_WITH_COLS,
3516 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3517 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3520 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3522 void
3523 for_each_line_table_case (void (*testcase) (const line_table_case &))
3525 /* As noted above in the description of struct line_table_case,
3526 we want to explore a test matrix of interesting line_table
3527 situations, running various selftests for each case within the
3528 matrix. */
3530 /* Run all tests with:
3531 (a) line_table->default_range_bits == 0, and
3532 (b) line_table->default_range_bits == 5. */
3533 int num_cases_tested = 0;
3534 for (int default_range_bits = 0; default_range_bits <= 5;
3535 default_range_bits += 5)
3537 /* ...and use each of the "interesting" location values as
3538 the starting location within line_table. */
3539 const int num_boundary_locations
3540 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3541 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3543 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3545 testcase (c);
3547 num_cases_tested++;
3551 /* Verify that we fully covered the test matrix. */
3552 ASSERT_EQ (num_cases_tested, 2 * 12);
3555 /* Run all of the selftests within this file. */
3557 void
3558 input_c_tests ()
3560 test_linenum_comparisons ();
3561 test_should_have_column_data_p ();
3562 test_unknown_location ();
3563 test_builtins ();
3564 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3566 for_each_line_table_case (test_accessing_ordinary_linemaps);
3567 for_each_line_table_case (test_lexer);
3568 for_each_line_table_case (test_lexer_string_locations_simple);
3569 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3570 for_each_line_table_case (test_lexer_string_locations_hex);
3571 for_each_line_table_case (test_lexer_string_locations_oct);
3572 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3573 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3574 for_each_line_table_case (test_lexer_string_locations_ucn4);
3575 for_each_line_table_case (test_lexer_string_locations_ucn8);
3576 for_each_line_table_case (test_lexer_string_locations_wide_string);
3577 for_each_line_table_case (test_lexer_string_locations_string16);
3578 for_each_line_table_case (test_lexer_string_locations_string32);
3579 for_each_line_table_case (test_lexer_string_locations_u8);
3580 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3581 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3582 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3583 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3584 for_each_line_table_case (test_lexer_string_locations_macro);
3585 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3586 for_each_line_table_case (test_lexer_string_locations_non_string);
3587 for_each_line_table_case (test_lexer_string_locations_long_line);
3588 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3589 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3590 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3591 for_each_line_table_case (test_lexer_char_constants);
3593 test_reading_source_line ();
3596 } // namespace selftest
3598 #endif /* CHECKING_P */