Fix compilation failure with C++98 compilers
[official-gcc.git] / gcc / input.c
blobeeeb11ecc0ab262f480c3dd54e85eb0105a190b3
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2018 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic-core.h"
25 #include "selftest.h"
26 #include "cpplib.h"
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
32 /* This is a cache used by get_next_line to store the content of a
33 file to be searched for file lines. */
34 struct fcache
36 /* These are information used to store a line boundary. */
37 struct line_info
39 /* The line number. It starts from 1. */
40 size_t line_num;
42 /* The position (byte count) of the beginning of the line,
43 relative to the file data pointer. This starts at zero. */
44 size_t start_pos;
46 /* The position (byte count) of the last byte of the line. This
47 normally points to the '\n' character, or to one byte after the
48 last byte of the file, if the file doesn't contain a '\n'
49 character. */
50 size_t end_pos;
52 line_info (size_t l, size_t s, size_t e)
53 : line_num (l), start_pos (s), end_pos (e)
56 line_info ()
57 :line_num (0), start_pos (0), end_pos (0)
61 /* The number of time this file has been accessed. This is used
62 to designate which file cache to evict from the cache
63 array. */
64 unsigned use_count;
66 /* The file_path is the key for identifying a particular file in
67 the cache.
68 For libcpp-using code, the underlying buffer for this field is
69 owned by the corresponding _cpp_file within the cpp_reader. */
70 const char *file_path;
72 FILE *fp;
74 /* This points to the content of the file that we've read so
75 far. */
76 char *data;
78 /* The size of the DATA array above.*/
79 size_t size;
81 /* The number of bytes read from the underlying file so far. This
82 must be less (or equal) than SIZE above. */
83 size_t nb_read;
85 /* The index of the beginning of the current line. */
86 size_t line_start_idx;
88 /* The number of the previous line read. This starts at 1. Zero
89 means we've read no line so far. */
90 size_t line_num;
92 /* This is the total number of lines of the current file. At the
93 moment, we try to get this information from the line map
94 subsystem. Note that this is just a hint. When using the C++
95 front-end, this hint is correct because the input file is then
96 completely tokenized before parsing starts; so the line map knows
97 the number of lines before compilation really starts. For e.g,
98 the C front-end, it can happen that we start emitting diagnostics
99 before the line map has seen the end of the file. */
100 size_t total_lines;
102 /* Could this file be missing a trailing newline on its final line?
103 Initially true (to cope with empty files), set to true/false
104 as each line is read. */
105 bool missing_trailing_newline;
107 /* This is a record of the beginning and end of the lines we've seen
108 while reading the file. This is useful to avoid walking the data
109 from the beginning when we are asked to read a line that is
110 before LINE_START_IDX above. Note that the maximum size of this
111 record is fcache_line_record_size, so that the memory consumption
112 doesn't explode. We thus scale total_lines down to
113 fcache_line_record_size. */
114 vec<line_info, va_heap> line_record;
116 fcache ();
117 ~fcache ();
120 /* Current position in real source file. */
122 location_t input_location = UNKNOWN_LOCATION;
124 struct line_maps *line_table;
126 /* A stashed copy of "line_table" for use by selftest::line_table_test.
127 This needs to be a global so that it can be a GC root, and thus
128 prevent the stashed copy from being garbage-collected if the GC runs
129 during a line_table_test. */
131 struct line_maps *saved_line_table;
133 static fcache *fcache_tab;
134 static const size_t fcache_tab_size = 16;
135 static const size_t fcache_buffer_size = 4 * 1024;
136 static const size_t fcache_line_record_size = 100;
138 /* Expand the source location LOC into a human readable location. If
139 LOC resolves to a builtin location, the file name of the readable
140 location is set to the string "<built-in>". If EXPANSION_POINT_P is
141 TRUE and LOC is virtual, then it is resolved to the expansion
142 point of the involved macro. Otherwise, it is resolved to the
143 spelling location of the token.
145 When resolving to the spelling location of the token, if the
146 resulting location is for a built-in location (that is, it has no
147 associated line/column) in the context of a macro expansion, the
148 returned location is the first one (while unwinding the macro
149 location towards its expansion point) that is in real source
150 code.
152 ASPECT controls which part of the location to use. */
154 static expanded_location
155 expand_location_1 (source_location loc,
156 bool expansion_point_p,
157 enum location_aspect aspect)
159 expanded_location xloc;
160 const line_map_ordinary *map;
161 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
162 tree block = NULL;
164 if (IS_ADHOC_LOC (loc))
166 block = LOCATION_BLOCK (loc);
167 loc = LOCATION_LOCUS (loc);
170 memset (&xloc, 0, sizeof (xloc));
172 if (loc >= RESERVED_LOCATION_COUNT)
174 if (!expansion_point_p)
176 /* We want to resolve LOC to its spelling location.
178 But if that spelling location is a reserved location that
179 appears in the context of a macro expansion (like for a
180 location for a built-in token), let's consider the first
181 location (toward the expansion point) that is not reserved;
182 that is, the first location that is in real source code. */
183 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
184 loc, NULL);
185 lrk = LRK_SPELLING_LOCATION;
187 loc = linemap_resolve_location (line_table, loc, lrk, &map);
189 /* loc is now either in an ordinary map, or is a reserved location.
190 If it is a compound location, the caret is in a spelling location,
191 but the start/finish might still be a virtual location.
192 Depending of what the caller asked for, we may need to recurse
193 one level in order to resolve any virtual locations in the
194 end-points. */
195 switch (aspect)
197 default:
198 gcc_unreachable ();
199 /* Fall through. */
200 case LOCATION_ASPECT_CARET:
201 break;
202 case LOCATION_ASPECT_START:
204 source_location start = get_start (loc);
205 if (start != loc)
206 return expand_location_1 (start, expansion_point_p, aspect);
208 break;
209 case LOCATION_ASPECT_FINISH:
211 source_location finish = get_finish (loc);
212 if (finish != loc)
213 return expand_location_1 (finish, expansion_point_p, aspect);
215 break;
217 xloc = linemap_expand_location (line_table, map, loc);
220 xloc.data = block;
221 if (loc <= BUILTINS_LOCATION)
222 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
224 return xloc;
227 /* Initialize the set of cache used for files accessed by caret
228 diagnostic. */
230 static void
231 diagnostic_file_cache_init (void)
233 if (fcache_tab == NULL)
234 fcache_tab = new fcache[fcache_tab_size];
237 /* Free the resources used by the set of cache used for files accessed
238 by caret diagnostic. */
240 void
241 diagnostic_file_cache_fini (void)
243 if (fcache_tab)
245 delete [] (fcache_tab);
246 fcache_tab = NULL;
250 /* Return the total lines number that have been read so far by the
251 line map (in the preprocessor) so far. For languages like C++ that
252 entirely preprocess the input file before starting to parse, this
253 equals the actual number of lines of the file. */
255 static size_t
256 total_lines_num (const char *file_path)
258 size_t r = 0;
259 source_location l = 0;
260 if (linemap_get_file_highest_location (line_table, file_path, &l))
262 gcc_assert (l >= RESERVED_LOCATION_COUNT);
263 expanded_location xloc = expand_location (l);
264 r = xloc.line;
266 return r;
269 /* Lookup the cache used for the content of a given file accessed by
270 caret diagnostic. Return the found cached file, or NULL if no
271 cached file was found. */
273 static fcache*
274 lookup_file_in_cache_tab (const char *file_path)
276 if (file_path == NULL)
277 return NULL;
279 diagnostic_file_cache_init ();
281 /* This will contain the found cached file. */
282 fcache *r = NULL;
283 for (unsigned i = 0; i < fcache_tab_size; ++i)
285 fcache *c = &fcache_tab[i];
286 if (c->file_path && !strcmp (c->file_path, file_path))
288 ++c->use_count;
289 r = c;
293 if (r)
294 ++r->use_count;
296 return r;
299 /* Purge any mention of FILENAME from the cache of files used for
300 printing source code. For use in selftests when working
301 with tempfiles. */
303 void
304 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
306 gcc_assert (file_path);
308 fcache *r = lookup_file_in_cache_tab (file_path);
309 if (!r)
310 /* Not found. */
311 return;
313 r->file_path = NULL;
314 if (r->fp)
315 fclose (r->fp);
316 r->fp = NULL;
317 r->nb_read = 0;
318 r->line_start_idx = 0;
319 r->line_num = 0;
320 r->line_record.truncate (0);
321 r->use_count = 0;
322 r->total_lines = 0;
323 r->missing_trailing_newline = true;
326 /* Return the file cache that has been less used, recently, or the
327 first empty one. If HIGHEST_USE_COUNT is non-null,
328 *HIGHEST_USE_COUNT is set to the highest use count of the entries
329 in the cache table. */
331 static fcache*
332 evicted_cache_tab_entry (unsigned *highest_use_count)
334 diagnostic_file_cache_init ();
336 fcache *to_evict = &fcache_tab[0];
337 unsigned huc = to_evict->use_count;
338 for (unsigned i = 1; i < fcache_tab_size; ++i)
340 fcache *c = &fcache_tab[i];
341 bool c_is_empty = (c->file_path == NULL);
343 if (c->use_count < to_evict->use_count
344 || (to_evict->file_path && c_is_empty))
345 /* We evict C because it's either an entry with a lower use
346 count or one that is empty. */
347 to_evict = c;
349 if (huc < c->use_count)
350 huc = c->use_count;
352 if (c_is_empty)
353 /* We've reached the end of the cache; subsequent elements are
354 all empty. */
355 break;
358 if (highest_use_count)
359 *highest_use_count = huc;
361 return to_evict;
364 /* Create the cache used for the content of a given file to be
365 accessed by caret diagnostic. This cache is added to an array of
366 cache and can be retrieved by lookup_file_in_cache_tab. This
367 function returns the created cache. Note that only the last
368 fcache_tab_size files are cached. */
370 static fcache*
371 add_file_to_cache_tab (const char *file_path)
374 FILE *fp = fopen (file_path, "r");
375 if (fp == NULL)
376 return NULL;
378 unsigned highest_use_count = 0;
379 fcache *r = evicted_cache_tab_entry (&highest_use_count);
380 r->file_path = file_path;
381 if (r->fp)
382 fclose (r->fp);
383 r->fp = fp;
384 r->nb_read = 0;
385 r->line_start_idx = 0;
386 r->line_num = 0;
387 r->line_record.truncate (0);
388 /* Ensure that this cache entry doesn't get evicted next time
389 add_file_to_cache_tab is called. */
390 r->use_count = ++highest_use_count;
391 r->total_lines = total_lines_num (file_path);
392 r->missing_trailing_newline = true;
394 return r;
397 /* Lookup the cache used for the content of a given file accessed by
398 caret diagnostic. If no cached file was found, create a new cache
399 for this file, add it to the array of cached file and return
400 it. */
402 static fcache*
403 lookup_or_add_file_to_cache_tab (const char *file_path)
405 fcache *r = lookup_file_in_cache_tab (file_path);
406 if (r == NULL)
407 r = add_file_to_cache_tab (file_path);
408 return r;
411 /* Default constructor for a cache of file used by caret
412 diagnostic. */
414 fcache::fcache ()
415 : use_count (0), file_path (NULL), fp (NULL), data (0),
416 size (0), nb_read (0), line_start_idx (0), line_num (0),
417 total_lines (0), missing_trailing_newline (true)
419 line_record.create (0);
422 /* Destructor for a cache of file used by caret diagnostic. */
424 fcache::~fcache ()
426 if (fp)
428 fclose (fp);
429 fp = NULL;
431 if (data)
433 XDELETEVEC (data);
434 data = 0;
436 line_record.release ();
439 /* Returns TRUE iff the cache would need to be filled with data coming
440 from the file. That is, either the cache is empty or full or the
441 current line is empty. Note that if the cache is full, it would
442 need to be extended and filled again. */
444 static bool
445 needs_read (fcache *c)
447 return (c->nb_read == 0
448 || c->nb_read == c->size
449 || (c->line_start_idx >= c->nb_read - 1));
452 /* Return TRUE iff the cache is full and thus needs to be
453 extended. */
455 static bool
456 needs_grow (fcache *c)
458 return c->nb_read == c->size;
461 /* Grow the cache if it needs to be extended. */
463 static void
464 maybe_grow (fcache *c)
466 if (!needs_grow (c))
467 return;
469 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
470 c->data = XRESIZEVEC (char, c->data, size);
471 c->size = size;
474 /* Read more data into the cache. Extends the cache if need be.
475 Returns TRUE iff new data could be read. */
477 static bool
478 read_data (fcache *c)
480 if (feof (c->fp) || ferror (c->fp))
481 return false;
483 maybe_grow (c);
485 char * from = c->data + c->nb_read;
486 size_t to_read = c->size - c->nb_read;
487 size_t nb_read = fread (from, 1, to_read, c->fp);
489 if (ferror (c->fp))
490 return false;
492 c->nb_read += nb_read;
493 return !!nb_read;
496 /* Read new data iff the cache needs to be filled with more data
497 coming from the file FP. Return TRUE iff the cache was filled with
498 mode data. */
500 static bool
501 maybe_read_data (fcache *c)
503 if (!needs_read (c))
504 return false;
505 return read_data (c);
508 /* Read a new line from file FP, using C as a cache for the data
509 coming from the file. Upon successful completion, *LINE is set to
510 the beginning of the line found. *LINE points directly in the
511 line cache and is only valid until the next call of get_next_line.
512 *LINE_LEN is set to the length of the line. Note that the line
513 does not contain any terminal delimiter. This function returns
514 true if some data was read or process from the cache, false
515 otherwise. Note that subsequent calls to get_next_line might
516 make the content of *LINE invalid. */
518 static bool
519 get_next_line (fcache *c, char **line, ssize_t *line_len)
521 /* Fill the cache with data to process. */
522 maybe_read_data (c);
524 size_t remaining_size = c->nb_read - c->line_start_idx;
525 if (remaining_size == 0)
526 /* There is no more data to process. */
527 return false;
529 char *line_start = c->data + c->line_start_idx;
531 char *next_line_start = NULL;
532 size_t len = 0;
533 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
534 if (line_end == NULL)
536 /* We haven't found the end-of-line delimiter in the cache.
537 Fill the cache with more data from the file and look for the
538 '\n'. */
539 while (maybe_read_data (c))
541 line_start = c->data + c->line_start_idx;
542 remaining_size = c->nb_read - c->line_start_idx;
543 line_end = (char *) memchr (line_start, '\n', remaining_size);
544 if (line_end != NULL)
546 next_line_start = line_end + 1;
547 break;
550 if (line_end == NULL)
552 /* We've loadded all the file into the cache and still no
553 '\n'. Let's say the line ends up at one byte passed the
554 end of the file. This is to stay consistent with the case
555 of when the line ends up with a '\n' and line_end points to
556 that terminal '\n'. That consistency is useful below in
557 the len calculation. */
558 line_end = c->data + c->nb_read ;
559 c->missing_trailing_newline = true;
561 else
562 c->missing_trailing_newline = false;
564 else
566 next_line_start = line_end + 1;
567 c->missing_trailing_newline = false;
570 if (ferror (c->fp))
571 return false;
573 /* At this point, we've found the end of the of line. It either
574 points to the '\n' or to one byte after the last byte of the
575 file. */
576 gcc_assert (line_end != NULL);
578 len = line_end - line_start;
580 if (c->line_start_idx < c->nb_read)
581 *line = line_start;
583 ++c->line_num;
585 /* Before we update our line record, make sure the hint about the
586 total number of lines of the file is correct. If it's not, then
587 we give up recording line boundaries from now on. */
588 bool update_line_record = true;
589 if (c->line_num > c->total_lines)
590 update_line_record = false;
592 /* Now update our line record so that re-reading lines from the
593 before c->line_start_idx is faster. */
594 if (update_line_record
595 && c->line_record.length () < fcache_line_record_size)
597 /* If the file lines fits in the line record, we just record all
598 its lines ...*/
599 if (c->total_lines <= fcache_line_record_size
600 && c->line_num > c->line_record.length ())
601 c->line_record.safe_push (fcache::line_info (c->line_num,
602 c->line_start_idx,
603 line_end - c->data));
604 else if (c->total_lines > fcache_line_record_size)
606 /* ... otherwise, we just scale total_lines down to
607 (fcache_line_record_size lines. */
608 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
609 if (c->line_record.length () == 0
610 || n >= c->line_record.length ())
611 c->line_record.safe_push (fcache::line_info (c->line_num,
612 c->line_start_idx,
613 line_end - c->data));
617 /* Update c->line_start_idx so that it points to the next line to be
618 read. */
619 if (next_line_start)
620 c->line_start_idx = next_line_start - c->data;
621 else
622 /* We didn't find any terminal '\n'. Let's consider that the end
623 of line is the end of the data in the cache. The next
624 invocation of get_next_line will either read more data from the
625 underlying file or return false early because we've reached the
626 end of the file. */
627 c->line_start_idx = c->nb_read;
629 *line_len = len;
631 return true;
634 /* Consume the next bytes coming from the cache (or from its
635 underlying file if there are remaining unread bytes in the file)
636 until we reach the next end-of-line (or end-of-file). There is no
637 copying from the cache involved. Return TRUE upon successful
638 completion. */
640 static bool
641 goto_next_line (fcache *cache)
643 char *l;
644 ssize_t len;
646 return get_next_line (cache, &l, &len);
649 /* Read an arbitrary line number LINE_NUM from the file cached in C.
650 If the line was read successfully, *LINE points to the beginning
651 of the line in the file cache and *LINE_LEN is the length of the
652 line. *LINE is not nul-terminated, but may contain zero bytes.
653 *LINE is only valid until the next call of read_line_num.
654 This function returns bool if a line was read. */
656 static bool
657 read_line_num (fcache *c, size_t line_num,
658 char **line, ssize_t *line_len)
660 gcc_assert (line_num > 0);
662 if (line_num <= c->line_num)
664 /* We've been asked to read lines that are before c->line_num.
665 So lets use our line record (if it's not empty) to try to
666 avoid re-reading the file from the beginning again. */
668 if (c->line_record.is_empty ())
670 c->line_start_idx = 0;
671 c->line_num = 0;
673 else
675 fcache::line_info *i = NULL;
676 if (c->total_lines <= fcache_line_record_size)
678 /* In languages where the input file is not totally
679 preprocessed up front, the c->total_lines hint
680 can be smaller than the number of lines of the
681 file. In that case, only the first
682 c->total_lines have been recorded.
684 Otherwise, the first c->total_lines we've read have
685 their start/end recorded here. */
686 i = (line_num <= c->total_lines)
687 ? &c->line_record[line_num - 1]
688 : &c->line_record[c->total_lines - 1];
689 gcc_assert (i->line_num <= line_num);
691 else
693 /* So the file had more lines than our line record
694 size. Thus the number of lines we've recorded has
695 been scaled down to fcache_line_reacord_size. Let's
696 pick the start/end of the recorded line that is
697 closest to line_num. */
698 size_t n = (line_num <= c->total_lines)
699 ? line_num * fcache_line_record_size / c->total_lines
700 : c ->line_record.length () - 1;
701 if (n < c->line_record.length ())
703 i = &c->line_record[n];
704 gcc_assert (i->line_num <= line_num);
708 if (i && i->line_num == line_num)
710 /* We have the start/end of the line. */
711 *line = c->data + i->start_pos;
712 *line_len = i->end_pos - i->start_pos;
713 return true;
716 if (i)
718 c->line_start_idx = i->start_pos;
719 c->line_num = i->line_num - 1;
721 else
723 c->line_start_idx = 0;
724 c->line_num = 0;
729 /* Let's walk from line c->line_num up to line_num - 1, without
730 copying any line. */
731 while (c->line_num < line_num - 1)
732 if (!goto_next_line (c))
733 return false;
735 /* The line we want is the next one. Let's read and copy it back to
736 the caller. */
737 return get_next_line (c, line, line_len);
740 /* Return the physical source line that corresponds to FILE_PATH/LINE.
741 The line is not nul-terminated. The returned pointer is only
742 valid until the next call of location_get_source_line.
743 Note that the line can contain several null characters,
744 so the returned value's length has the actual length of the line.
745 If the function fails, a NULL char_span is returned. */
747 char_span
748 location_get_source_line (const char *file_path, int line)
750 char *buffer = NULL;
751 ssize_t len;
753 if (line == 0)
754 return char_span (NULL, 0);
756 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
757 if (c == NULL)
758 return char_span (NULL, 0);
760 bool read = read_line_num (c, line, &buffer, &len);
761 if (!read)
762 return char_span (NULL, 0);
764 return char_span (buffer, len);
767 /* Determine if FILE_PATH missing a trailing newline on its final line.
768 Only valid to call once all of the file has been loaded, by
769 requesting a line number beyond the end of the file. */
771 bool
772 location_missing_trailing_newline (const char *file_path)
774 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
775 if (c == NULL)
776 return false;
778 return c->missing_trailing_newline;
781 /* Test if the location originates from the spelling location of a
782 builtin-tokens. That is, return TRUE if LOC is a (possibly
783 virtual) location of a built-in token that appears in the expansion
784 list of a macro. Please note that this function also works on
785 tokens that result from built-in tokens. For instance, the
786 function would return true if passed a token "4" that is the result
787 of the expansion of the built-in __LINE__ macro. */
788 bool
789 is_location_from_builtin_token (source_location loc)
791 const line_map_ordinary *map = NULL;
792 loc = linemap_resolve_location (line_table, loc,
793 LRK_SPELLING_LOCATION, &map);
794 return loc == BUILTINS_LOCATION;
797 /* Expand the source location LOC into a human readable location. If
798 LOC is virtual, it resolves to the expansion point of the involved
799 macro. If LOC resolves to a builtin location, the file name of the
800 readable location is set to the string "<built-in>". */
802 expanded_location
803 expand_location (source_location loc)
805 return expand_location_1 (loc, /*expansion_point_p=*/true,
806 LOCATION_ASPECT_CARET);
809 /* Expand the source location LOC into a human readable location. If
810 LOC is virtual, it resolves to the expansion location of the
811 relevant macro. If LOC resolves to a builtin location, the file
812 name of the readable location is set to the string
813 "<built-in>". */
815 expanded_location
816 expand_location_to_spelling_point (source_location loc,
817 enum location_aspect aspect)
819 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
822 /* The rich_location class within libcpp requires a way to expand
823 source_location instances, and relies on the client code
824 providing a symbol named
825 linemap_client_expand_location_to_spelling_point
826 to do this.
828 This is the implementation for libcommon.a (all host binaries),
829 which simply calls into expand_location_1. */
831 expanded_location
832 linemap_client_expand_location_to_spelling_point (source_location loc,
833 enum location_aspect aspect)
835 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
839 /* If LOCATION is in a system header and if it is a virtual location for
840 a token coming from the expansion of a macro, unwind it to the
841 location of the expansion point of the macro. Otherwise, just return
842 LOCATION.
844 This is used for instance when we want to emit diagnostics about a
845 token that may be located in a macro that is itself defined in a
846 system header, for example, for the NULL macro. In such a case, if
847 LOCATION were passed directly to diagnostic functions such as
848 warning_at, the diagnostic would be suppressed (unless
849 -Wsystem-headers). */
851 source_location
852 expansion_point_location_if_in_system_header (source_location location)
854 if (in_system_header_at (location))
855 location = linemap_resolve_location (line_table, location,
856 LRK_MACRO_EXPANSION_POINT,
857 NULL);
858 return location;
861 /* If LOCATION is a virtual location for a token coming from the expansion
862 of a macro, unwind to the location of the expansion point of the macro. */
864 source_location
865 expansion_point_location (source_location location)
867 return linemap_resolve_location (line_table, location,
868 LRK_MACRO_EXPANSION_POINT, NULL);
871 /* Construct a location with caret at CARET, ranging from START to
872 finish e.g.
874 11111111112
875 12345678901234567890
877 523 return foo + bar;
878 ~~~~^~~~~
881 The location's caret is at the "+", line 523 column 15, but starts
882 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
883 of "bar" at column 19. */
885 location_t
886 make_location (location_t caret, location_t start, location_t finish)
888 location_t pure_loc = get_pure_location (caret);
889 source_range src_range;
890 src_range.m_start = get_start (start);
891 src_range.m_finish = get_finish (finish);
892 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
893 pure_loc,
894 src_range,
895 NULL);
896 return combined_loc;
899 /* Same as above, but taking a source range rather than two locations. */
901 location_t
902 make_location (location_t caret, source_range src_range)
904 location_t pure_loc = get_pure_location (caret);
905 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
908 #define ONE_K 1024
909 #define ONE_M (ONE_K * ONE_K)
911 /* Display a number as an integer multiple of either:
912 - 1024, if said integer is >= to 10 K (in base 2)
913 - 1024 * 1024, if said integer is >= 10 M in (base 2)
915 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
916 ? (x) \
917 : ((x) < 10 * ONE_M \
918 ? (x) / ONE_K \
919 : (x) / ONE_M)))
921 /* For a given integer, display either:
922 - the character 'k', if the number is higher than 10 K (in base 2)
923 but strictly lower than 10 M (in base 2)
924 - the character 'M' if the number is higher than 10 M (in base2)
925 - the charcter ' ' if the number is strictly lower than 10 K */
926 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
928 /* Display an integer amount as multiple of 1K or 1M (in base 2).
929 Display the correct unit (either k, M, or ' ') after the amount, as
930 well. */
931 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
933 /* Dump statistics to stderr about the memory usage of the line_table
934 set of line maps. This also displays some statistics about macro
935 expansion. */
937 void
938 dump_line_table_statistics (void)
940 struct linemap_stats s;
941 long total_used_map_size,
942 macro_maps_size,
943 total_allocated_map_size;
945 memset (&s, 0, sizeof (s));
947 linemap_get_statistics (line_table, &s);
949 macro_maps_size = s.macro_maps_used_size
950 + s.macro_maps_locations_size;
952 total_allocated_map_size = s.ordinary_maps_allocated_size
953 + s.macro_maps_allocated_size
954 + s.macro_maps_locations_size;
956 total_used_map_size = s.ordinary_maps_used_size
957 + s.macro_maps_used_size
958 + s.macro_maps_locations_size;
960 fprintf (stderr, "Number of expanded macros: %5ld\n",
961 s.num_expanded_macros);
962 if (s.num_expanded_macros != 0)
963 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
964 s.num_macro_tokens / s.num_expanded_macros);
965 fprintf (stderr,
966 "\nLine Table allocations during the "
967 "compilation process\n");
968 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
969 SCALE (s.num_ordinary_maps_used),
970 STAT_LABEL (s.num_ordinary_maps_used));
971 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
972 SCALE (s.ordinary_maps_used_size),
973 STAT_LABEL (s.ordinary_maps_used_size));
974 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
975 SCALE (s.num_ordinary_maps_allocated),
976 STAT_LABEL (s.num_ordinary_maps_allocated));
977 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
978 SCALE (s.ordinary_maps_allocated_size),
979 STAT_LABEL (s.ordinary_maps_allocated_size));
980 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
981 SCALE (s.num_macro_maps_used),
982 STAT_LABEL (s.num_macro_maps_used));
983 fprintf (stderr, "Macro maps used size: %5ld%c\n",
984 SCALE (s.macro_maps_used_size),
985 STAT_LABEL (s.macro_maps_used_size));
986 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
987 SCALE (s.macro_maps_locations_size),
988 STAT_LABEL (s.macro_maps_locations_size));
989 fprintf (stderr, "Macro maps size: %5ld%c\n",
990 SCALE (macro_maps_size),
991 STAT_LABEL (macro_maps_size));
992 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
993 SCALE (s.duplicated_macro_maps_locations_size),
994 STAT_LABEL (s.duplicated_macro_maps_locations_size));
995 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
996 SCALE (total_allocated_map_size),
997 STAT_LABEL (total_allocated_map_size));
998 fprintf (stderr, "Total used maps size: %5ld%c\n",
999 SCALE (total_used_map_size),
1000 STAT_LABEL (total_used_map_size));
1001 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
1002 SCALE (s.adhoc_table_size),
1003 STAT_LABEL (s.adhoc_table_size));
1004 fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
1005 s.adhoc_table_entries_used);
1006 fprintf (stderr, "optimized_ranges: %i\n",
1007 line_table->num_optimized_ranges);
1008 fprintf (stderr, "unoptimized_ranges: %i\n",
1009 line_table->num_unoptimized_ranges);
1011 fprintf (stderr, "\n");
1014 /* Get location one beyond the final location in ordinary map IDX. */
1016 static source_location
1017 get_end_location (struct line_maps *set, unsigned int idx)
1019 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1020 return set->highest_location;
1022 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1023 return MAP_START_LOCATION (next_map);
1026 /* Helper function for write_digit_row. */
1028 static void
1029 write_digit (FILE *stream, int digit)
1031 fputc ('0' + (digit % 10), stream);
1034 /* Helper function for dump_location_info.
1035 Write a row of numbers to STREAM, numbering a source line,
1036 giving the units, tens, hundreds etc of the column number. */
1038 static void
1039 write_digit_row (FILE *stream, int indent,
1040 const line_map_ordinary *map,
1041 source_location loc, int max_col, int divisor)
1043 fprintf (stream, "%*c", indent, ' ');
1044 fprintf (stream, "|");
1045 for (int column = 1; column < max_col; column++)
1047 source_location column_loc = loc + (column << map->m_range_bits);
1048 write_digit (stream, column_loc / divisor);
1050 fprintf (stream, "\n");
1053 /* Write a half-closed (START) / half-open (END) interval of
1054 source_location to STREAM. */
1056 static void
1057 dump_location_range (FILE *stream,
1058 source_location start, source_location end)
1060 fprintf (stream,
1061 " source_location interval: %u <= loc < %u\n",
1062 start, end);
1065 /* Write a labelled description of a half-closed (START) / half-open (END)
1066 interval of source_location to STREAM. */
1068 static void
1069 dump_labelled_location_range (FILE *stream,
1070 const char *name,
1071 source_location start, source_location end)
1073 fprintf (stream, "%s\n", name);
1074 dump_location_range (stream, start, end);
1075 fprintf (stream, "\n");
1078 /* Write a visualization of the locations in the line_table to STREAM. */
1080 void
1081 dump_location_info (FILE *stream)
1083 /* Visualize the reserved locations. */
1084 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1085 0, RESERVED_LOCATION_COUNT);
1087 /* Visualize the ordinary line_map instances, rendering the sources. */
1088 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1090 source_location end_location = get_end_location (line_table, idx);
1091 /* half-closed: doesn't include this one. */
1093 const line_map_ordinary *map
1094 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1095 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1096 dump_location_range (stream,
1097 MAP_START_LOCATION (map), end_location);
1098 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1099 fprintf (stream, " starting at line: %i\n",
1100 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1101 fprintf (stream, " column and range bits: %i\n",
1102 map->m_column_and_range_bits);
1103 fprintf (stream, " column bits: %i\n",
1104 map->m_column_and_range_bits - map->m_range_bits);
1105 fprintf (stream, " range bits: %i\n",
1106 map->m_range_bits);
1108 /* Render the span of source lines that this "map" covers. */
1109 for (source_location loc = MAP_START_LOCATION (map);
1110 loc < end_location;
1111 loc += (1 << map->m_range_bits) )
1113 gcc_assert (pure_location_p (line_table, loc) );
1115 expanded_location exploc
1116 = linemap_expand_location (line_table, map, loc);
1118 if (exploc.column == 0)
1120 /* Beginning of a new source line: draw the line. */
1122 char_span line_text = location_get_source_line (exploc.file,
1123 exploc.line);
1124 if (!line_text)
1125 break;
1126 fprintf (stream,
1127 "%s:%3i|loc:%5i|%.*s\n",
1128 exploc.file, exploc.line,
1129 loc,
1130 (int)line_text.length (), line_text.get_buffer ());
1132 /* "loc" is at column 0, which means "the whole line".
1133 Render the locations *within* the line, by underlining
1134 it, showing the source_location numeric values
1135 at each column. */
1136 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1137 if (max_col > line_text.length ())
1138 max_col = line_text.length () + 1;
1140 int indent = 14 + strlen (exploc.file);
1142 /* Thousands. */
1143 if (end_location > 999)
1144 write_digit_row (stream, indent, map, loc, max_col, 1000);
1146 /* Hundreds. */
1147 if (end_location > 99)
1148 write_digit_row (stream, indent, map, loc, max_col, 100);
1150 /* Tens. */
1151 write_digit_row (stream, indent, map, loc, max_col, 10);
1153 /* Units. */
1154 write_digit_row (stream, indent, map, loc, max_col, 1);
1157 fprintf (stream, "\n");
1160 /* Visualize unallocated values. */
1161 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1162 line_table->highest_location,
1163 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1165 /* Visualize the macro line_map instances, rendering the sources. */
1166 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1168 /* Each macro map that is allocated owns source_location values
1169 that are *lower* that the one before them.
1170 Hence it's meaningful to view them either in order of ascending
1171 source locations, or in order of ascending macro map index. */
1172 const bool ascending_source_locations = true;
1173 unsigned int idx = (ascending_source_locations
1174 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1175 : i);
1176 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1177 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1178 idx,
1179 linemap_map_get_macro_name (map),
1180 MACRO_MAP_NUM_MACRO_TOKENS (map));
1181 dump_location_range (stream,
1182 map->start_location,
1183 (map->start_location
1184 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1185 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1186 "expansion point is location %i",
1187 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1188 fprintf (stream, " map->start_location: %u\n",
1189 map->start_location);
1191 fprintf (stream, " macro_locations:\n");
1192 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1194 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1195 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1197 /* linemap_add_macro_token encodes token numbers in an expansion
1198 by putting them after MAP_START_LOCATION. */
1200 /* I'm typically seeing 4 uninitialized entries at the end of
1201 0xafafafaf.
1202 This appears to be due to macro.c:replace_args
1203 adding 2 extra args for padding tokens; presumably there may
1204 be a leading and/or trailing padding token injected,
1205 each for 2 more location slots.
1206 This would explain there being up to 4 source_locations slots
1207 that may be uninitialized. */
1209 fprintf (stream, " %u: %u, %u\n",
1213 if (x == y)
1215 if (x < MAP_START_LOCATION (map))
1216 inform (x, "token %u has x-location == y-location == %u", i, x);
1217 else
1218 fprintf (stream,
1219 "x-location == y-location == %u encodes token # %u\n",
1220 x, x - MAP_START_LOCATION (map));
1222 else
1224 inform (x, "token %u has x-location == %u", i, x);
1225 inform (x, "token %u has y-location == %u", i, y);
1228 fprintf (stream, "\n");
1231 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1232 macro map, presumably due to an off-by-one error somewhere
1233 between the logic in linemap_enter_macro and
1234 LINEMAPS_MACRO_LOWEST_LOCATION. */
1235 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1236 MAX_SOURCE_LOCATION,
1237 MAX_SOURCE_LOCATION + 1);
1239 /* Visualize ad-hoc values. */
1240 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1241 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1244 /* string_concat's constructor. */
1246 string_concat::string_concat (int num, location_t *locs)
1247 : m_num (num)
1249 m_locs = ggc_vec_alloc <location_t> (num);
1250 for (int i = 0; i < num; i++)
1251 m_locs[i] = locs[i];
1254 /* string_concat_db's constructor. */
1256 string_concat_db::string_concat_db ()
1258 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1261 /* Record that a string concatenation occurred, covering NUM
1262 string literal tokens. LOCS is an array of size NUM, containing the
1263 locations of the tokens. A copy of LOCS is taken. */
1265 void
1266 string_concat_db::record_string_concatenation (int num, location_t *locs)
1268 gcc_assert (num > 1);
1269 gcc_assert (locs);
1271 location_t key_loc = get_key_loc (locs[0]);
1273 string_concat *concat
1274 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1275 m_table->put (key_loc, concat);
1278 /* Determine if LOC was the location of the the initial token of a
1279 concatenation of string literal tokens.
1280 If so, *OUT_NUM is written to with the number of tokens, and
1281 *OUT_LOCS with the location of an array of locations of the
1282 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1283 storage owned by the string_concat_db.
1284 Otherwise, return false. */
1286 bool
1287 string_concat_db::get_string_concatenation (location_t loc,
1288 int *out_num,
1289 location_t **out_locs)
1291 gcc_assert (out_num);
1292 gcc_assert (out_locs);
1294 location_t key_loc = get_key_loc (loc);
1296 string_concat **concat = m_table->get (key_loc);
1297 if (!concat)
1298 return false;
1300 *out_num = (*concat)->m_num;
1301 *out_locs =(*concat)->m_locs;
1302 return true;
1305 /* Internal function. Canonicalize LOC into a form suitable for
1306 use as a key within the database, stripping away macro expansion,
1307 ad-hoc information, and range information, using the location of
1308 the start of LOC within an ordinary linemap. */
1310 location_t
1311 string_concat_db::get_key_loc (location_t loc)
1313 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1314 NULL);
1316 loc = get_range_from_loc (line_table, loc).m_start;
1318 return loc;
1321 /* Helper class for use within get_substring_ranges_for_loc.
1322 An vec of cpp_string with responsibility for releasing all of the
1323 str->text for each str in the vector. */
1325 class auto_cpp_string_vec : public auto_vec <cpp_string>
1327 public:
1328 auto_cpp_string_vec (int alloc)
1329 : auto_vec <cpp_string> (alloc) {}
1331 ~auto_cpp_string_vec ()
1333 /* Clean up the copies within this vec. */
1334 int i;
1335 cpp_string *str;
1336 FOR_EACH_VEC_ELT (*this, i, str)
1337 free (const_cast <unsigned char *> (str->text));
1341 /* Attempt to populate RANGES with source location information on the
1342 individual characters within the string literal found at STRLOC.
1343 If CONCATS is non-NULL, then any string literals that the token at
1344 STRLOC was concatenated with are also added to RANGES.
1346 Return NULL if successful, or an error message if any errors occurred (in
1347 which case RANGES may be only partially populated and should not
1348 be used).
1350 This is implemented by re-parsing the relevant source line(s). */
1352 static const char *
1353 get_substring_ranges_for_loc (cpp_reader *pfile,
1354 string_concat_db *concats,
1355 location_t strloc,
1356 enum cpp_ttype type,
1357 cpp_substring_ranges &ranges)
1359 gcc_assert (pfile);
1361 if (strloc == UNKNOWN_LOCATION)
1362 return "unknown location";
1364 /* Reparsing the strings requires accurate location information.
1365 If -ftrack-macro-expansion has been overridden from its default
1366 of 2, then we might have a location of a macro expansion point,
1367 rather than the location of the literal itself.
1368 Avoid this by requiring that we have full macro expansion tracking
1369 for substring locations to be available. */
1370 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1371 return "track_macro_expansion != 2";
1373 /* If #line or # 44 "file"-style directives are present, then there's
1374 no guarantee that the line numbers we have can be used to locate
1375 the strings. For example, we might have a .i file with # directives
1376 pointing back to lines within a .c file, but the .c file might
1377 have been edited since the .i file was created.
1378 In such a case, the safest course is to disable on-demand substring
1379 locations. */
1380 if (line_table->seen_line_directive)
1381 return "seen line directive";
1383 /* If string concatenation has occurred at STRLOC, get the locations
1384 of all of the literal tokens making up the compound string.
1385 Otherwise, just use STRLOC. */
1386 int num_locs = 1;
1387 location_t *strlocs = &strloc;
1388 if (concats)
1389 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1391 auto_cpp_string_vec strs (num_locs);
1392 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1393 for (int i = 0; i < num_locs; i++)
1395 /* Get range of strloc. We will use it to locate the start and finish
1396 of the literal token within the line. */
1397 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1399 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1401 /* If the string token was within a macro expansion, then we can
1402 cope with it for the simple case where we have a single token.
1403 Otherwise, bail out. */
1404 if (src_range.m_start != src_range.m_finish)
1405 return "macro expansion";
1407 else
1409 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1410 /* If so, we can't reliably determine where the token started within
1411 its line. */
1412 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1414 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1415 /* If so, we can't reliably determine where the token finished
1416 within its line. */
1417 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1420 expanded_location start
1421 = expand_location_to_spelling_point (src_range.m_start,
1422 LOCATION_ASPECT_START);
1423 expanded_location finish
1424 = expand_location_to_spelling_point (src_range.m_finish,
1425 LOCATION_ASPECT_FINISH);
1426 if (start.file != finish.file)
1427 return "range endpoints are in different files";
1428 if (start.line != finish.line)
1429 return "range endpoints are on different lines";
1430 if (start.column > finish.column)
1431 return "range endpoints are reversed";
1433 char_span line = location_get_source_line (start.file, start.line);
1434 if (!line)
1435 return "unable to read source line";
1437 /* Determine the location of the literal (including quotes
1438 and leading prefix chars, such as the 'u' in a u""
1439 token). */
1440 size_t literal_length = finish.column - start.column + 1;
1442 /* Ensure that we don't crash if we got the wrong location. */
1443 if (line.length () < (start.column - 1 + literal_length))
1444 return "line is not wide enough";
1446 char_span literal = line.subspan (start.column - 1, literal_length);
1448 cpp_string from;
1449 from.len = literal_length;
1450 /* Make a copy of the literal, to avoid having to rely on
1451 the lifetime of the copy of the line within the cache.
1452 This will be released by the auto_cpp_string_vec dtor. */
1453 from.text = (unsigned char *)literal.xstrdup ();
1454 strs.safe_push (from);
1456 /* For very long lines, a new linemap could have started
1457 halfway through the token.
1458 Ensure that the loc_reader uses the linemap of the
1459 *end* of the token for its start location. */
1460 const line_map_ordinary *final_ord_map;
1461 linemap_resolve_location (line_table, src_range.m_finish,
1462 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1463 location_t start_loc
1464 = linemap_position_for_line_and_column (line_table, final_ord_map,
1465 start.line, start.column);
1467 cpp_string_location_reader loc_reader (start_loc, line_table);
1468 loc_readers.safe_push (loc_reader);
1471 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1472 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1473 loc_readers.address (),
1474 num_locs, &ranges, type);
1475 if (err)
1476 return err;
1478 /* Success: "ranges" should now contain information on the string. */
1479 return NULL;
1482 /* Attempt to populate *OUT_LOC with source location information on the
1483 given characters within the string literal found at STRLOC.
1484 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1485 character set.
1487 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1488 and string literal "012345\n789"
1489 *OUT_LOC is written to with:
1490 "012345\n789"
1491 ~^~~~~
1493 If CONCATS is non-NULL, then any string literals that the token at
1494 STRLOC was concatenated with are also considered.
1496 This is implemented by re-parsing the relevant source line(s).
1498 Return NULL if successful, or an error message if any errors occurred.
1499 Error messages are intended for GCC developers (to help debugging) rather
1500 than for end-users. */
1502 const char *
1503 get_source_location_for_substring (cpp_reader *pfile,
1504 string_concat_db *concats,
1505 location_t strloc,
1506 enum cpp_ttype type,
1507 int caret_idx, int start_idx, int end_idx,
1508 source_location *out_loc)
1510 gcc_checking_assert (caret_idx >= 0);
1511 gcc_checking_assert (start_idx >= 0);
1512 gcc_checking_assert (end_idx >= 0);
1513 gcc_assert (out_loc);
1515 cpp_substring_ranges ranges;
1516 const char *err
1517 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1518 if (err)
1519 return err;
1521 if (caret_idx >= ranges.get_num_ranges ())
1522 return "caret_idx out of range";
1523 if (start_idx >= ranges.get_num_ranges ())
1524 return "start_idx out of range";
1525 if (end_idx >= ranges.get_num_ranges ())
1526 return "end_idx out of range";
1528 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1529 ranges.get_range (start_idx).m_start,
1530 ranges.get_range (end_idx).m_finish);
1531 return NULL;
1534 #if CHECKING_P
1536 namespace selftest {
1538 /* Selftests of location handling. */
1540 /* Attempt to populate *OUT_RANGE with source location information on the
1541 given character within the string literal found at STRLOC.
1542 CHAR_IDX refers to an offset within the execution character set.
1543 If CONCATS is non-NULL, then any string literals that the token at
1544 STRLOC was concatenated with are also considered.
1546 This is implemented by re-parsing the relevant source line(s).
1548 Return NULL if successful, or an error message if any errors occurred.
1549 Error messages are intended for GCC developers (to help debugging) rather
1550 than for end-users. */
1552 static const char *
1553 get_source_range_for_char (cpp_reader *pfile,
1554 string_concat_db *concats,
1555 location_t strloc,
1556 enum cpp_ttype type,
1557 int char_idx,
1558 source_range *out_range)
1560 gcc_checking_assert (char_idx >= 0);
1561 gcc_assert (out_range);
1563 cpp_substring_ranges ranges;
1564 const char *err
1565 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1566 if (err)
1567 return err;
1569 if (char_idx >= ranges.get_num_ranges ())
1570 return "char_idx out of range";
1572 *out_range = ranges.get_range (char_idx);
1573 return NULL;
1576 /* As get_source_range_for_char, but write to *OUT the number
1577 of ranges that are available. */
1579 static const char *
1580 get_num_source_ranges_for_substring (cpp_reader *pfile,
1581 string_concat_db *concats,
1582 location_t strloc,
1583 enum cpp_ttype type,
1584 int *out)
1586 gcc_assert (out);
1588 cpp_substring_ranges ranges;
1589 const char *err
1590 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1592 if (err)
1593 return err;
1595 *out = ranges.get_num_ranges ();
1596 return NULL;
1599 /* Selftests of location handling. */
1601 /* Verify that compare() on linenum_type handles comparisons over the full
1602 range of the type. */
1604 static void
1605 test_linenum_comparisons ()
1607 linenum_type min_line (0);
1608 linenum_type max_line (0xffffffff);
1609 ASSERT_EQ (0, compare (min_line, min_line));
1610 ASSERT_EQ (0, compare (max_line, max_line));
1612 ASSERT_GT (compare (max_line, min_line), 0);
1613 ASSERT_LT (compare (min_line, max_line), 0);
1616 /* Helper function for verifying location data: when location_t
1617 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1618 as having column 0. */
1620 static bool
1621 should_have_column_data_p (location_t loc)
1623 if (IS_ADHOC_LOC (loc))
1624 loc = get_location_from_adhoc_loc (line_table, loc);
1625 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1626 return false;
1627 return true;
1630 /* Selftest for should_have_column_data_p. */
1632 static void
1633 test_should_have_column_data_p ()
1635 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1636 ASSERT_TRUE
1637 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1638 ASSERT_FALSE
1639 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1642 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1643 on LOC. */
1645 static void
1646 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1647 location_t loc)
1649 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1650 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1651 /* If location_t values are sufficiently high, then column numbers
1652 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1653 When close to the threshold, column numbers *may* be present: if
1654 the final linemap before the threshold contains a line that straddles
1655 the threshold, locations in that line have column information. */
1656 if (should_have_column_data_p (loc))
1657 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1660 /* Various selftests involve constructing a line table and one or more
1661 line maps within it.
1663 For maximum test coverage we want to run these tests with a variety
1664 of situations:
1665 - line_table->default_range_bits: some frontends use a non-zero value
1666 and others use zero
1667 - the fallback modes within line-map.c: there are various threshold
1668 values for source_location/location_t beyond line-map.c changes
1669 behavior (disabling of the range-packing optimization, disabling
1670 of column-tracking). We can exercise these by starting the line_table
1671 at interesting values at or near these thresholds.
1673 The following struct describes a particular case within our test
1674 matrix. */
1676 struct line_table_case
1678 line_table_case (int default_range_bits, int base_location)
1679 : m_default_range_bits (default_range_bits),
1680 m_base_location (base_location)
1683 int m_default_range_bits;
1684 int m_base_location;
1687 /* Constructor. Store the old value of line_table, and create a new
1688 one, using sane defaults. */
1690 line_table_test::line_table_test ()
1692 gcc_assert (saved_line_table == NULL);
1693 saved_line_table = line_table;
1694 line_table = ggc_alloc<line_maps> ();
1695 linemap_init (line_table, BUILTINS_LOCATION);
1696 gcc_assert (saved_line_table->reallocator);
1697 line_table->reallocator = saved_line_table->reallocator;
1698 gcc_assert (saved_line_table->round_alloc_size);
1699 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1700 line_table->default_range_bits = 0;
1703 /* Constructor. Store the old value of line_table, and create a new
1704 one, using the sitation described in CASE_. */
1706 line_table_test::line_table_test (const line_table_case &case_)
1708 gcc_assert (saved_line_table == NULL);
1709 saved_line_table = line_table;
1710 line_table = ggc_alloc<line_maps> ();
1711 linemap_init (line_table, BUILTINS_LOCATION);
1712 gcc_assert (saved_line_table->reallocator);
1713 line_table->reallocator = saved_line_table->reallocator;
1714 gcc_assert (saved_line_table->round_alloc_size);
1715 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1716 line_table->default_range_bits = case_.m_default_range_bits;
1717 if (case_.m_base_location)
1719 line_table->highest_location = case_.m_base_location;
1720 line_table->highest_line = case_.m_base_location;
1724 /* Destructor. Restore the old value of line_table. */
1726 line_table_test::~line_table_test ()
1728 gcc_assert (saved_line_table != NULL);
1729 line_table = saved_line_table;
1730 saved_line_table = NULL;
1733 /* Verify basic operation of ordinary linemaps. */
1735 static void
1736 test_accessing_ordinary_linemaps (const line_table_case &case_)
1738 line_table_test ltt (case_);
1740 /* Build a simple linemap describing some locations. */
1741 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1743 linemap_line_start (line_table, 1, 100);
1744 location_t loc_a = linemap_position_for_column (line_table, 1);
1745 location_t loc_b = linemap_position_for_column (line_table, 23);
1747 linemap_line_start (line_table, 2, 100);
1748 location_t loc_c = linemap_position_for_column (line_table, 1);
1749 location_t loc_d = linemap_position_for_column (line_table, 17);
1751 /* Example of a very long line. */
1752 linemap_line_start (line_table, 3, 2000);
1753 location_t loc_e = linemap_position_for_column (line_table, 700);
1755 /* Transitioning back to a short line. */
1756 linemap_line_start (line_table, 4, 0);
1757 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1759 if (should_have_column_data_p (loc_back_to_short))
1761 /* Verify that we switched to short lines in the linemap. */
1762 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1763 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1766 /* Example of a line that will eventually be seen to be longer
1767 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1768 below that. */
1769 linemap_line_start (line_table, 5, 2000);
1771 location_t loc_start_of_very_long_line
1772 = linemap_position_for_column (line_table, 2000);
1773 location_t loc_too_wide
1774 = linemap_position_for_column (line_table, 4097);
1775 location_t loc_too_wide_2
1776 = linemap_position_for_column (line_table, 4098);
1778 /* ...and back to a sane line length. */
1779 linemap_line_start (line_table, 6, 100);
1780 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1782 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1784 /* Multiple files. */
1785 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1786 linemap_line_start (line_table, 1, 200);
1787 location_t loc_f = linemap_position_for_column (line_table, 150);
1788 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1790 /* Verify that we can recover the location info. */
1791 assert_loceq ("foo.c", 1, 1, loc_a);
1792 assert_loceq ("foo.c", 1, 23, loc_b);
1793 assert_loceq ("foo.c", 2, 1, loc_c);
1794 assert_loceq ("foo.c", 2, 17, loc_d);
1795 assert_loceq ("foo.c", 3, 700, loc_e);
1796 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1798 /* In the very wide line, the initial location should be fully tracked. */
1799 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1800 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1801 be disabled. */
1802 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1803 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1804 /*...and column-tracking should be re-enabled for subsequent lines. */
1805 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1807 assert_loceq ("bar.c", 1, 150, loc_f);
1809 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1810 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1812 /* Verify using make_location to build a range, and extracting data
1813 back from it. */
1814 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1815 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1816 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1817 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1818 ASSERT_EQ (loc_b, src_range.m_start);
1819 ASSERT_EQ (loc_d, src_range.m_finish);
1822 /* Verify various properties of UNKNOWN_LOCATION. */
1824 static void
1825 test_unknown_location ()
1827 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1828 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1829 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1832 /* Verify various properties of BUILTINS_LOCATION. */
1834 static void
1835 test_builtins ()
1837 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1838 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1841 /* Regression test for make_location.
1842 Ensure that we use pure locations for the start/finish of the range,
1843 rather than storing a packed or ad-hoc range as the start/finish. */
1845 static void
1846 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1848 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1849 with C++ frontend.
1850 ....................0000000001111111111222.
1851 ....................1234567890123456789012. */
1852 const char *content = " r += !aaa == bbb;\n";
1853 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1854 line_table_test ltt (case_);
1855 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1857 const location_t c11 = linemap_position_for_column (line_table, 11);
1858 const location_t c12 = linemap_position_for_column (line_table, 12);
1859 const location_t c13 = linemap_position_for_column (line_table, 13);
1860 const location_t c14 = linemap_position_for_column (line_table, 14);
1861 const location_t c21 = linemap_position_for_column (line_table, 21);
1863 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1864 return;
1866 /* Use column 13 for the caret location, arbitrarily, to verify that we
1867 handle start != caret. */
1868 const location_t aaa = make_location (c13, c12, c14);
1869 ASSERT_EQ (c13, get_pure_location (aaa));
1870 ASSERT_EQ (c12, get_start (aaa));
1871 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1872 ASSERT_EQ (c14, get_finish (aaa));
1873 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1875 /* Make a location using a location with a range as the start-point. */
1876 const location_t not_aaa = make_location (c11, aaa, c14);
1877 ASSERT_EQ (c11, get_pure_location (not_aaa));
1878 /* It should use the start location of the range, not store the range
1879 itself. */
1880 ASSERT_EQ (c12, get_start (not_aaa));
1881 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1882 ASSERT_EQ (c14, get_finish (not_aaa));
1883 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1885 /* Similarly, make a location with a range as the end-point. */
1886 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1887 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1888 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1889 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1890 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1891 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1892 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1893 /* It should use the finish location of the range, not store the range
1894 itself. */
1895 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1896 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1897 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1898 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1899 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1902 /* Verify reading of input files (e.g. for caret-based diagnostics). */
1904 static void
1905 test_reading_source_line ()
1907 /* Create a tempfile and write some text to it. */
1908 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1909 "01234567890123456789\n"
1910 "This is the test text\n"
1911 "This is the 3rd line");
1913 /* Read back a specific line from the tempfile. */
1914 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1915 ASSERT_TRUE (source_line);
1916 ASSERT_TRUE (source_line.get_buffer () != NULL);
1917 ASSERT_EQ (20, source_line.length ());
1918 ASSERT_TRUE (!strncmp ("This is the 3rd line",
1919 source_line.get_buffer (), source_line.length ()));
1921 source_line = location_get_source_line (tmp.get_filename (), 2);
1922 ASSERT_TRUE (source_line);
1923 ASSERT_TRUE (source_line.get_buffer () != NULL);
1924 ASSERT_EQ (21, source_line.length ());
1925 ASSERT_TRUE (!strncmp ("This is the test text",
1926 source_line.get_buffer (), source_line.length ()));
1928 source_line = location_get_source_line (tmp.get_filename (), 4);
1929 ASSERT_FALSE (source_line);
1930 ASSERT_TRUE (source_line.get_buffer () == NULL);
1933 /* Tests of lexing. */
1935 /* Verify that token TOK from PARSER has cpp_token_as_text
1936 equal to EXPECTED_TEXT. */
1938 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1939 SELFTEST_BEGIN_STMT \
1940 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1941 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1942 SELFTEST_END_STMT
1944 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1945 and ranges from EXP_START_COL to EXP_FINISH_COL.
1946 Use LOC as the effective location of the selftest. */
1948 static void
1949 assert_token_loc_eq (const location &loc,
1950 const cpp_token *tok,
1951 const char *exp_filename, int exp_linenum,
1952 int exp_start_col, int exp_finish_col)
1954 location_t tok_loc = tok->src_loc;
1955 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1956 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1958 /* If location_t values are sufficiently high, then column numbers
1959 will be unavailable. */
1960 if (!should_have_column_data_p (tok_loc))
1961 return;
1963 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1964 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1965 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1966 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1969 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1970 SELFTEST_LOCATION as the effective location of the selftest. */
1972 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1973 EXP_START_COL, EXP_FINISH_COL) \
1974 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1975 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1977 /* Test of lexing a file using libcpp, verifying tokens and their
1978 location information. */
1980 static void
1981 test_lexer (const line_table_case &case_)
1983 /* Create a tempfile and write some text to it. */
1984 const char *content =
1985 /*00000000011111111112222222222333333.3333444444444.455555555556
1986 12345678901234567890123456789012345.6789012345678.901234567890. */
1987 ("test_name /* c-style comment */\n"
1988 " \"test literal\"\n"
1989 " // test c++-style comment\n"
1990 " 42\n");
1991 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1993 line_table_test ltt (case_);
1995 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1997 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1998 ASSERT_NE (fname, NULL);
2000 /* Verify that we get the expected tokens back, with the correct
2001 location information. */
2003 location_t loc;
2004 const cpp_token *tok;
2005 tok = cpp_get_token_with_location (parser, &loc);
2006 ASSERT_NE (tok, NULL);
2007 ASSERT_EQ (tok->type, CPP_NAME);
2008 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2009 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2011 tok = cpp_get_token_with_location (parser, &loc);
2012 ASSERT_NE (tok, NULL);
2013 ASSERT_EQ (tok->type, CPP_STRING);
2014 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2015 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2017 tok = cpp_get_token_with_location (parser, &loc);
2018 ASSERT_NE (tok, NULL);
2019 ASSERT_EQ (tok->type, CPP_NUMBER);
2020 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2021 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2023 tok = cpp_get_token_with_location (parser, &loc);
2024 ASSERT_NE (tok, NULL);
2025 ASSERT_EQ (tok->type, CPP_EOF);
2027 cpp_finish (parser, NULL);
2028 cpp_destroy (parser);
2031 /* Forward decls. */
2033 struct lexer_test;
2034 class lexer_test_options;
2036 /* A class for specifying options of a lexer_test.
2037 The "apply" vfunc is called during the lexer_test constructor. */
2039 class lexer_test_options
2041 public:
2042 virtual void apply (lexer_test &) = 0;
2045 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2046 in its dtor.
2048 This is needed by struct lexer_test to ensure that the cleanup of the
2049 cpp_reader happens *after* the cleanup of the temp_source_file. */
2051 class cpp_reader_ptr
2053 public:
2054 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2056 ~cpp_reader_ptr ()
2058 cpp_finish (m_ptr, NULL);
2059 cpp_destroy (m_ptr);
2062 operator cpp_reader * () const { return m_ptr; }
2064 private:
2065 cpp_reader *m_ptr;
2068 /* A struct for writing lexer tests. */
2070 struct lexer_test
2072 lexer_test (const line_table_case &case_, const char *content,
2073 lexer_test_options *options);
2074 ~lexer_test ();
2076 const cpp_token *get_token ();
2078 /* The ordering of these fields matters.
2079 The line_table_test must be first, since the cpp_reader_ptr
2080 uses it.
2081 The cpp_reader must be cleaned up *after* the temp_source_file
2082 since the filenames in input.c's input cache are owned by the
2083 cpp_reader; in particular, when ~temp_source_file evicts the
2084 filename the filenames must still be alive. */
2085 line_table_test m_ltt;
2086 cpp_reader_ptr m_parser;
2087 temp_source_file m_tempfile;
2088 string_concat_db m_concats;
2089 bool m_implicitly_expect_EOF;
2092 /* Use an EBCDIC encoding for the execution charset, specifically
2093 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2095 This exercises iconv integration within libcpp.
2096 Not every build of iconv supports the given charset,
2097 so we need to flag this error and handle it gracefully. */
2099 class ebcdic_execution_charset : public lexer_test_options
2101 public:
2102 ebcdic_execution_charset () : m_num_iconv_errors (0)
2104 gcc_assert (s_singleton == NULL);
2105 s_singleton = this;
2107 ~ebcdic_execution_charset ()
2109 gcc_assert (s_singleton == this);
2110 s_singleton = NULL;
2113 void apply (lexer_test &test) FINAL OVERRIDE
2115 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2116 cpp_opts->narrow_charset = "IBM1047";
2118 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2119 callbacks->diagnostic = on_diagnostic;
2122 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2123 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2124 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2125 rich_location *richloc ATTRIBUTE_UNUSED,
2126 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2127 ATTRIBUTE_FPTR_PRINTF(5,0)
2129 gcc_assert (s_singleton);
2130 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2131 const char *msg = "conversion from %s to %s not supported by iconv";
2132 #ifdef ENABLE_NLS
2133 msg = dgettext ("cpplib", msg);
2134 #endif
2135 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2136 when the local iconv build doesn't support the conversion. */
2137 if (strcmp (msgid, msg) == 0)
2139 s_singleton->m_num_iconv_errors++;
2140 return true;
2143 /* Otherwise, we have an unexpected error. */
2144 abort ();
2147 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2149 private:
2150 static ebcdic_execution_charset *s_singleton;
2151 int m_num_iconv_errors;
2154 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2156 /* A lexer_test_options subclass that records a list of diagnostic
2157 messages emitted by the lexer. */
2159 class lexer_diagnostic_sink : public lexer_test_options
2161 public:
2162 lexer_diagnostic_sink ()
2164 gcc_assert (s_singleton == NULL);
2165 s_singleton = this;
2167 ~lexer_diagnostic_sink ()
2169 gcc_assert (s_singleton == this);
2170 s_singleton = NULL;
2172 int i;
2173 char *str;
2174 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2175 free (str);
2178 void apply (lexer_test &test) FINAL OVERRIDE
2180 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2181 callbacks->diagnostic = on_diagnostic;
2184 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2185 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2186 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2187 rich_location *richloc ATTRIBUTE_UNUSED,
2188 const char *msgid, va_list *ap)
2189 ATTRIBUTE_FPTR_PRINTF(5,0)
2191 char *msg = xvasprintf (msgid, *ap);
2192 s_singleton->m_diagnostics.safe_push (msg);
2193 return true;
2196 auto_vec<char *> m_diagnostics;
2198 private:
2199 static lexer_diagnostic_sink *s_singleton;
2202 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2204 /* Constructor. Override line_table with a new instance based on CASE_,
2205 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2206 start parsing the tempfile. */
2208 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2209 lexer_test_options *options)
2210 : m_ltt (case_),
2211 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2212 /* Create a tempfile and write the text to it. */
2213 m_tempfile (SELFTEST_LOCATION, ".c", content),
2214 m_concats (),
2215 m_implicitly_expect_EOF (true)
2217 if (options)
2218 options->apply (*this);
2220 cpp_init_iconv (m_parser);
2222 /* Parse the file. */
2223 const char *fname = cpp_read_main_file (m_parser,
2224 m_tempfile.get_filename ());
2225 ASSERT_NE (fname, NULL);
2228 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2230 lexer_test::~lexer_test ()
2232 location_t loc;
2233 const cpp_token *tok;
2235 if (m_implicitly_expect_EOF)
2237 tok = cpp_get_token_with_location (m_parser, &loc);
2238 ASSERT_NE (tok, NULL);
2239 ASSERT_EQ (tok->type, CPP_EOF);
2243 /* Get the next token from m_parser. */
2245 const cpp_token *
2246 lexer_test::get_token ()
2248 location_t loc;
2249 const cpp_token *tok;
2251 tok = cpp_get_token_with_location (m_parser, &loc);
2252 ASSERT_NE (tok, NULL);
2253 return tok;
2256 /* Verify that locations within string literals are correctly handled. */
2258 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2259 using the string concatenation database for TEST.
2261 Assert that the character at index IDX is on EXPECTED_LINE,
2262 and that it begins at column EXPECTED_START_COL and ends at
2263 EXPECTED_FINISH_COL (unless the locations are beyond
2264 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2265 columns). */
2267 static void
2268 assert_char_at_range (const location &loc,
2269 lexer_test& test,
2270 location_t strloc, enum cpp_ttype type, int idx,
2271 int expected_line, int expected_start_col,
2272 int expected_finish_col)
2274 cpp_reader *pfile = test.m_parser;
2275 string_concat_db *concats = &test.m_concats;
2277 source_range actual_range = source_range();
2278 const char *err
2279 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2280 &actual_range);
2281 if (should_have_column_data_p (strloc))
2282 ASSERT_EQ_AT (loc, NULL, err);
2283 else
2285 ASSERT_STREQ_AT (loc,
2286 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2287 err);
2288 return;
2291 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2292 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2293 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2294 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2296 if (should_have_column_data_p (actual_range.m_start))
2298 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2299 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2301 if (should_have_column_data_p (actual_range.m_finish))
2303 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2304 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2308 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2309 the effective location of any errors. */
2311 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2312 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2313 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2314 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2315 (EXPECTED_FINISH_COL))
2317 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2318 using the string concatenation database for TEST.
2320 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2322 static void
2323 assert_num_substring_ranges (const location &loc,
2324 lexer_test& test,
2325 location_t strloc,
2326 enum cpp_ttype type,
2327 int expected_num_ranges)
2329 cpp_reader *pfile = test.m_parser;
2330 string_concat_db *concats = &test.m_concats;
2332 int actual_num_ranges = -1;
2333 const char *err
2334 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2335 &actual_num_ranges);
2336 if (should_have_column_data_p (strloc))
2337 ASSERT_EQ_AT (loc, NULL, err);
2338 else
2340 ASSERT_STREQ_AT (loc,
2341 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2342 err);
2343 return;
2345 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2348 /* Macro for calling assert_num_substring_ranges, supplying
2349 SELFTEST_LOCATION for the effective location of any errors. */
2351 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2352 EXPECTED_NUM_RANGES) \
2353 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2354 (TYPE), (EXPECTED_NUM_RANGES))
2357 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2358 returns an error (using the string concatenation database for TEST). */
2360 static void
2361 assert_has_no_substring_ranges (const location &loc,
2362 lexer_test& test,
2363 location_t strloc,
2364 enum cpp_ttype type,
2365 const char *expected_err)
2367 cpp_reader *pfile = test.m_parser;
2368 string_concat_db *concats = &test.m_concats;
2369 cpp_substring_ranges ranges;
2370 const char *actual_err
2371 = get_substring_ranges_for_loc (pfile, concats, strloc,
2372 type, ranges);
2373 if (should_have_column_data_p (strloc))
2374 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2375 else
2376 ASSERT_STREQ_AT (loc,
2377 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2378 actual_err);
2381 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2382 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2383 (STRLOC), (TYPE), (ERR))
2385 /* Lex a simple string literal. Verify the substring location data, before
2386 and after running cpp_interpret_string on it. */
2388 static void
2389 test_lexer_string_locations_simple (const line_table_case &case_)
2391 /* Digits 0-9 (with 0 at column 10), the simple way.
2392 ....................000000000.11111111112.2222222223333333333
2393 ....................123456789.01234567890.1234567890123456789
2394 We add a trailing comment to ensure that we correctly locate
2395 the end of the string literal token. */
2396 const char *content = " \"0123456789\" /* not a string */\n";
2397 lexer_test test (case_, content, NULL);
2399 /* Verify that we get the expected token back, with the correct
2400 location information. */
2401 const cpp_token *tok = test.get_token ();
2402 ASSERT_EQ (tok->type, CPP_STRING);
2403 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2404 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2406 /* At this point in lexing, the quote characters are treated as part of
2407 the string (they are stripped off by cpp_interpret_string). */
2409 ASSERT_EQ (tok->val.str.len, 12);
2411 /* Verify that cpp_interpret_string works. */
2412 cpp_string dst_string;
2413 const enum cpp_ttype type = CPP_STRING;
2414 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2415 &dst_string, type);
2416 ASSERT_TRUE (result);
2417 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2418 free (const_cast <unsigned char *> (dst_string.text));
2420 /* Verify ranges of individual characters. This no longer includes the
2421 opening quote, but does include the closing quote. */
2422 for (int i = 0; i <= 10; i++)
2423 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2424 10 + i, 10 + i);
2426 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2429 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2430 encoding. */
2432 static void
2433 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2435 /* EBCDIC support requires iconv. */
2436 if (!HAVE_ICONV)
2437 return;
2439 /* Digits 0-9 (with 0 at column 10), the simple way.
2440 ....................000000000.11111111112.2222222223333333333
2441 ....................123456789.01234567890.1234567890123456789
2442 We add a trailing comment to ensure that we correctly locate
2443 the end of the string literal token. */
2444 const char *content = " \"0123456789\" /* not a string */\n";
2445 ebcdic_execution_charset use_ebcdic;
2446 lexer_test test (case_, content, &use_ebcdic);
2448 /* Verify that we get the expected token back, with the correct
2449 location information. */
2450 const cpp_token *tok = test.get_token ();
2451 ASSERT_EQ (tok->type, CPP_STRING);
2452 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2453 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2455 /* At this point in lexing, the quote characters are treated as part of
2456 the string (they are stripped off by cpp_interpret_string). */
2458 ASSERT_EQ (tok->val.str.len, 12);
2460 /* The remainder of the test requires an iconv implementation that
2461 can convert from UTF-8 to the EBCDIC encoding requested above. */
2462 if (use_ebcdic.iconv_errors_occurred_p ())
2463 return;
2465 /* Verify that cpp_interpret_string works. */
2466 cpp_string dst_string;
2467 const enum cpp_ttype type = CPP_STRING;
2468 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2469 &dst_string, type);
2470 ASSERT_TRUE (result);
2471 /* We should now have EBCDIC-encoded text, specifically
2472 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2473 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2474 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2475 (const char *)dst_string.text);
2476 free (const_cast <unsigned char *> (dst_string.text));
2478 /* Verify that we don't attempt to record substring location information
2479 for such cases. */
2480 ASSERT_HAS_NO_SUBSTRING_RANGES
2481 (test, tok->src_loc, type,
2482 "execution character set != source character set");
2485 /* Lex a string literal containing a hex-escaped character.
2486 Verify the substring location data, before and after running
2487 cpp_interpret_string on it. */
2489 static void
2490 test_lexer_string_locations_hex (const line_table_case &case_)
2492 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2493 and with a space in place of digit 6, to terminate the escaped
2494 hex code.
2495 ....................000000000.111111.11112222.
2496 ....................123456789.012345.67890123. */
2497 const char *content = " \"01234\\x35 789\"\n";
2498 lexer_test test (case_, content, NULL);
2500 /* Verify that we get the expected token back, with the correct
2501 location information. */
2502 const cpp_token *tok = test.get_token ();
2503 ASSERT_EQ (tok->type, CPP_STRING);
2504 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2505 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2507 /* At this point in lexing, the quote characters are treated as part of
2508 the string (they are stripped off by cpp_interpret_string). */
2509 ASSERT_EQ (tok->val.str.len, 15);
2511 /* Verify that cpp_interpret_string works. */
2512 cpp_string dst_string;
2513 const enum cpp_ttype type = CPP_STRING;
2514 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2515 &dst_string, type);
2516 ASSERT_TRUE (result);
2517 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2518 free (const_cast <unsigned char *> (dst_string.text));
2520 /* Verify ranges of individual characters. This no longer includes the
2521 opening quote, but does include the closing quote. */
2522 for (int i = 0; i <= 4; i++)
2523 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2524 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2525 for (int i = 6; i <= 10; i++)
2526 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2528 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2531 /* Lex a string literal containing an octal-escaped character.
2532 Verify the substring location data after running cpp_interpret_string
2533 on it. */
2535 static void
2536 test_lexer_string_locations_oct (const line_table_case &case_)
2538 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2539 and with a space in place of digit 6, to terminate the escaped
2540 octal code.
2541 ....................000000000.111111.11112222.2222223333333333444
2542 ....................123456789.012345.67890123.4567890123456789012 */
2543 const char *content = " \"01234\\065 789\" /* not a string */\n";
2544 lexer_test test (case_, content, NULL);
2546 /* Verify that we get the expected token back, with the correct
2547 location information. */
2548 const cpp_token *tok = test.get_token ();
2549 ASSERT_EQ (tok->type, CPP_STRING);
2550 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2552 /* Verify that cpp_interpret_string works. */
2553 cpp_string dst_string;
2554 const enum cpp_ttype type = CPP_STRING;
2555 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2556 &dst_string, type);
2557 ASSERT_TRUE (result);
2558 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2559 free (const_cast <unsigned char *> (dst_string.text));
2561 /* Verify ranges of individual characters. This no longer includes the
2562 opening quote, but does include the closing quote. */
2563 for (int i = 0; i < 5; i++)
2564 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2565 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2566 for (int i = 6; i <= 10; i++)
2567 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2569 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2572 /* Test of string literal containing letter escapes. */
2574 static void
2575 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2577 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2578 .....................000000000.1.11111.1.1.11222.22222223333333
2579 .....................123456789.0.12345.6.7.89012.34567890123456. */
2580 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2581 lexer_test test (case_, content, NULL);
2583 /* Verify that we get the expected tokens back. */
2584 const cpp_token *tok = test.get_token ();
2585 ASSERT_EQ (tok->type, CPP_STRING);
2586 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2588 /* Verify ranges of individual characters. */
2589 /* "\t". */
2590 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2591 0, 1, 10, 11);
2592 /* "foo". */
2593 for (int i = 1; i <= 3; i++)
2594 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2595 i, 1, 11 + i, 11 + i);
2596 /* "\\" and "\n". */
2597 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2598 4, 1, 15, 16);
2599 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2600 5, 1, 17, 18);
2602 /* "bar" and closing quote for nul-terminator. */
2603 for (int i = 6; i <= 9; i++)
2604 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2605 i, 1, 13 + i, 13 + i);
2607 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2610 /* Another test of a string literal containing a letter escape.
2611 Based on string seen in
2612 printf ("%-%\n");
2613 in gcc.dg/format/c90-printf-1.c. */
2615 static void
2616 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2618 /* .....................000000000.1111.11.1111.22222222223.
2619 .....................123456789.0123.45.6789.01234567890. */
2620 const char *content = (" \"%-%\\n\" /* non-str */\n");
2621 lexer_test test (case_, content, NULL);
2623 /* Verify that we get the expected tokens back. */
2624 const cpp_token *tok = test.get_token ();
2625 ASSERT_EQ (tok->type, CPP_STRING);
2626 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2628 /* Verify ranges of individual characters. */
2629 /* "%-%". */
2630 for (int i = 0; i < 3; i++)
2631 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2632 i, 1, 10 + i, 10 + i);
2633 /* "\n". */
2634 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2635 3, 1, 13, 14);
2637 /* Closing quote for nul-terminator. */
2638 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2639 4, 1, 15, 15);
2641 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2644 /* Lex a string literal containing UCN 4 characters.
2645 Verify the substring location data after running cpp_interpret_string
2646 on it. */
2648 static void
2649 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2651 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2652 as UCN 4.
2653 ....................000000000.111111.111122.222222223.33333333344444
2654 ....................123456789.012345.678901.234567890.12345678901234 */
2655 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2656 lexer_test test (case_, content, NULL);
2658 /* Verify that we get the expected token back, with the correct
2659 location information. */
2660 const cpp_token *tok = test.get_token ();
2661 ASSERT_EQ (tok->type, CPP_STRING);
2662 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2664 /* Verify that cpp_interpret_string works.
2665 The string should be encoded in the execution character
2666 set. Assuming that that is UTF-8, we should have the following:
2667 ----------- ---- ----- ------- ----------------
2668 Byte offset Byte Octal Unicode Source Column(s)
2669 ----------- ---- ----- ------- ----------------
2670 0 0x30 '0' 10
2671 1 0x31 '1' 11
2672 2 0x32 '2' 12
2673 3 0x33 '3' 13
2674 4 0x34 '4' 14
2675 5 0xE2 \342 U+2174 15-20
2676 6 0x85 \205 (cont) 15-20
2677 7 0xB4 \264 (cont) 15-20
2678 8 0xE2 \342 U+2175 21-26
2679 9 0x85 \205 (cont) 21-26
2680 10 0xB5 \265 (cont) 21-26
2681 11 0x37 '7' 27
2682 12 0x38 '8' 28
2683 13 0x39 '9' 29
2684 14 0x00 30 (closing quote)
2685 ----------- ---- ----- ------- ---------------. */
2687 cpp_string dst_string;
2688 const enum cpp_ttype type = CPP_STRING;
2689 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2690 &dst_string, type);
2691 ASSERT_TRUE (result);
2692 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2693 (const char *)dst_string.text);
2694 free (const_cast <unsigned char *> (dst_string.text));
2696 /* Verify ranges of individual characters. This no longer includes the
2697 opening quote, but does include the closing quote.
2698 '01234'. */
2699 for (int i = 0; i <= 4; i++)
2700 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2701 /* U+2174. */
2702 for (int i = 5; i <= 7; i++)
2703 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2704 /* U+2175. */
2705 for (int i = 8; i <= 10; i++)
2706 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2707 /* '789' and nul terminator */
2708 for (int i = 11; i <= 14; i++)
2709 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2711 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2714 /* Lex a string literal containing UCN 8 characters.
2715 Verify the substring location data after running cpp_interpret_string
2716 on it. */
2718 static void
2719 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2721 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2722 ....................000000000.111111.1111222222.2222333333333.344444
2723 ....................123456789.012345.6789012345.6789012345678.901234 */
2724 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2725 lexer_test test (case_, content, NULL);
2727 /* Verify that we get the expected token back, with the correct
2728 location information. */
2729 const cpp_token *tok = test.get_token ();
2730 ASSERT_EQ (tok->type, CPP_STRING);
2731 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2732 "\"01234\\U00002174\\U00002175789\"");
2734 /* Verify that cpp_interpret_string works.
2735 The UTF-8 encoding of the string is identical to that from
2736 the ucn4 testcase above; the only difference is the column
2737 locations. */
2738 cpp_string dst_string;
2739 const enum cpp_ttype type = CPP_STRING;
2740 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2741 &dst_string, type);
2742 ASSERT_TRUE (result);
2743 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2744 (const char *)dst_string.text);
2745 free (const_cast <unsigned char *> (dst_string.text));
2747 /* Verify ranges of individual characters. This no longer includes the
2748 opening quote, but does include the closing quote.
2749 '01234'. */
2750 for (int i = 0; i <= 4; i++)
2751 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2752 /* U+2174. */
2753 for (int i = 5; i <= 7; i++)
2754 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2755 /* U+2175. */
2756 for (int i = 8; i <= 10; i++)
2757 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2758 /* '789' at columns 35-37 */
2759 for (int i = 11; i <= 13; i++)
2760 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2761 /* Closing quote/nul-terminator at column 38. */
2762 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2764 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2767 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2769 static uint32_t
2770 uint32_from_big_endian (const uint32_t *ptr_be_value)
2772 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2773 return (((uint32_t) buf[0] << 24)
2774 | ((uint32_t) buf[1] << 16)
2775 | ((uint32_t) buf[2] << 8)
2776 | (uint32_t) buf[3]);
2779 /* Lex a wide string literal and verify that attempts to read substring
2780 location data from it fail gracefully. */
2782 static void
2783 test_lexer_string_locations_wide_string (const line_table_case &case_)
2785 /* Digits 0-9.
2786 ....................000000000.11111111112.22222222233333
2787 ....................123456789.01234567890.12345678901234 */
2788 const char *content = " L\"0123456789\" /* non-str */\n";
2789 lexer_test test (case_, content, NULL);
2791 /* Verify that we get the expected token back, with the correct
2792 location information. */
2793 const cpp_token *tok = test.get_token ();
2794 ASSERT_EQ (tok->type, CPP_WSTRING);
2795 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2797 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2798 cpp_string dst_string;
2799 const enum cpp_ttype type = CPP_WSTRING;
2800 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2801 &dst_string, type);
2802 ASSERT_TRUE (result);
2803 /* The cpp_reader defaults to big-endian with
2804 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2805 now be encoded as UTF-32BE. */
2806 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2807 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2808 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2809 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2810 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2811 free (const_cast <unsigned char *> (dst_string.text));
2813 /* We don't yet support generating substring location information
2814 for L"" strings. */
2815 ASSERT_HAS_NO_SUBSTRING_RANGES
2816 (test, tok->src_loc, type,
2817 "execution character set != source character set");
2820 /* Fetch a big-endian 16-bit value and convert to host endianness. */
2822 static uint16_t
2823 uint16_from_big_endian (const uint16_t *ptr_be_value)
2825 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2826 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2829 /* Lex a u"" string literal and verify that attempts to read substring
2830 location data from it fail gracefully. */
2832 static void
2833 test_lexer_string_locations_string16 (const line_table_case &case_)
2835 /* Digits 0-9.
2836 ....................000000000.11111111112.22222222233333
2837 ....................123456789.01234567890.12345678901234 */
2838 const char *content = " u\"0123456789\" /* non-str */\n";
2839 lexer_test test (case_, content, NULL);
2841 /* Verify that we get the expected token back, with the correct
2842 location information. */
2843 const cpp_token *tok = test.get_token ();
2844 ASSERT_EQ (tok->type, CPP_STRING16);
2845 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2847 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2848 cpp_string dst_string;
2849 const enum cpp_ttype type = CPP_STRING16;
2850 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2851 &dst_string, type);
2852 ASSERT_TRUE (result);
2854 /* The cpp_reader defaults to big-endian, so dst_string should
2855 now be encoded as UTF-16BE. */
2856 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2857 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2858 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2859 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2860 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2861 free (const_cast <unsigned char *> (dst_string.text));
2863 /* We don't yet support generating substring location information
2864 for L"" strings. */
2865 ASSERT_HAS_NO_SUBSTRING_RANGES
2866 (test, tok->src_loc, type,
2867 "execution character set != source character set");
2870 /* Lex a U"" string literal and verify that attempts to read substring
2871 location data from it fail gracefully. */
2873 static void
2874 test_lexer_string_locations_string32 (const line_table_case &case_)
2876 /* Digits 0-9.
2877 ....................000000000.11111111112.22222222233333
2878 ....................123456789.01234567890.12345678901234 */
2879 const char *content = " U\"0123456789\" /* non-str */\n";
2880 lexer_test test (case_, content, NULL);
2882 /* Verify that we get the expected token back, with the correct
2883 location information. */
2884 const cpp_token *tok = test.get_token ();
2885 ASSERT_EQ (tok->type, CPP_STRING32);
2886 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2888 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2889 cpp_string dst_string;
2890 const enum cpp_ttype type = CPP_STRING32;
2891 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2892 &dst_string, type);
2893 ASSERT_TRUE (result);
2895 /* The cpp_reader defaults to big-endian, so dst_string should
2896 now be encoded as UTF-32BE. */
2897 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2898 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2899 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2900 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2901 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2902 free (const_cast <unsigned char *> (dst_string.text));
2904 /* We don't yet support generating substring location information
2905 for L"" strings. */
2906 ASSERT_HAS_NO_SUBSTRING_RANGES
2907 (test, tok->src_loc, type,
2908 "execution character set != source character set");
2911 /* Lex a u8-string literal.
2912 Verify the substring location data after running cpp_interpret_string
2913 on it. */
2915 static void
2916 test_lexer_string_locations_u8 (const line_table_case &case_)
2918 /* Digits 0-9.
2919 ....................000000000.11111111112.22222222233333
2920 ....................123456789.01234567890.12345678901234 */
2921 const char *content = " u8\"0123456789\" /* non-str */\n";
2922 lexer_test test (case_, content, NULL);
2924 /* Verify that we get the expected token back, with the correct
2925 location information. */
2926 const cpp_token *tok = test.get_token ();
2927 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2928 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2930 /* Verify that cpp_interpret_string works. */
2931 cpp_string dst_string;
2932 const enum cpp_ttype type = CPP_STRING;
2933 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2934 &dst_string, type);
2935 ASSERT_TRUE (result);
2936 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2937 free (const_cast <unsigned char *> (dst_string.text));
2939 /* Verify ranges of individual characters. This no longer includes the
2940 opening quote, but does include the closing quote. */
2941 for (int i = 0; i <= 10; i++)
2942 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2945 /* Lex a string literal containing UTF-8 source characters.
2946 Verify the substring location data after running cpp_interpret_string
2947 on it. */
2949 static void
2950 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2952 /* This string literal is written out to the source file as UTF-8,
2953 and is of the form "before mojibake after", where "mojibake"
2954 is written as the following four unicode code points:
2955 U+6587 CJK UNIFIED IDEOGRAPH-6587
2956 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2957 U+5316 CJK UNIFIED IDEOGRAPH-5316
2958 U+3051 HIRAGANA LETTER KE.
2959 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2960 "before" and "after" are 1 byte per unicode character.
2962 The numbering shown are "columns", which are *byte* numbers within
2963 the line, rather than unicode character numbers.
2965 .................... 000000000.1111111.
2966 .................... 123456789.0123456. */
2967 const char *content = (" \"before "
2968 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2969 UTF-8: 0xE6 0x96 0x87
2970 C octal escaped UTF-8: \346\226\207
2971 "column" numbers: 17-19. */
2972 "\346\226\207"
2974 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2975 UTF-8: 0xE5 0xAD 0x97
2976 C octal escaped UTF-8: \345\255\227
2977 "column" numbers: 20-22. */
2978 "\345\255\227"
2980 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2981 UTF-8: 0xE5 0x8C 0x96
2982 C octal escaped UTF-8: \345\214\226
2983 "column" numbers: 23-25. */
2984 "\345\214\226"
2986 /* U+3051 HIRAGANA LETTER KE
2987 UTF-8: 0xE3 0x81 0x91
2988 C octal escaped UTF-8: \343\201\221
2989 "column" numbers: 26-28. */
2990 "\343\201\221"
2992 /* column numbers 29 onwards
2993 2333333.33334444444444
2994 9012345.67890123456789. */
2995 " after\" /* non-str */\n");
2996 lexer_test test (case_, content, NULL);
2998 /* Verify that we get the expected token back, with the correct
2999 location information. */
3000 const cpp_token *tok = test.get_token ();
3001 ASSERT_EQ (tok->type, CPP_STRING);
3002 ASSERT_TOKEN_AS_TEXT_EQ
3003 (test.m_parser, tok,
3004 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3006 /* Verify that cpp_interpret_string works. */
3007 cpp_string dst_string;
3008 const enum cpp_ttype type = CPP_STRING;
3009 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3010 &dst_string, type);
3011 ASSERT_TRUE (result);
3012 ASSERT_STREQ
3013 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3014 (const char *)dst_string.text);
3015 free (const_cast <unsigned char *> (dst_string.text));
3017 /* Verify ranges of individual characters. This no longer includes the
3018 opening quote, but does include the closing quote.
3019 Assuming that both source and execution encodings are UTF-8, we have
3020 a run of 25 octets in each, plus the NUL terminator. */
3021 for (int i = 0; i < 25; i++)
3022 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3023 /* NUL-terminator should use the closing quote at column 35. */
3024 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3026 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3029 /* Test of string literal concatenation. */
3031 static void
3032 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3034 /* Digits 0-9.
3035 .....................000000000.111111.11112222222222
3036 .....................123456789.012345.67890123456789. */
3037 const char *content = (" \"01234\" /* non-str */\n"
3038 " \"56789\" /* non-str */\n");
3039 lexer_test test (case_, content, NULL);
3041 location_t input_locs[2];
3043 /* Verify that we get the expected tokens back. */
3044 auto_vec <cpp_string> input_strings;
3045 const cpp_token *tok_a = test.get_token ();
3046 ASSERT_EQ (tok_a->type, CPP_STRING);
3047 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3048 input_strings.safe_push (tok_a->val.str);
3049 input_locs[0] = tok_a->src_loc;
3051 const cpp_token *tok_b = test.get_token ();
3052 ASSERT_EQ (tok_b->type, CPP_STRING);
3053 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3054 input_strings.safe_push (tok_b->val.str);
3055 input_locs[1] = tok_b->src_loc;
3057 /* Verify that cpp_interpret_string works. */
3058 cpp_string dst_string;
3059 const enum cpp_ttype type = CPP_STRING;
3060 bool result = cpp_interpret_string (test.m_parser,
3061 input_strings.address (), 2,
3062 &dst_string, type);
3063 ASSERT_TRUE (result);
3064 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3065 free (const_cast <unsigned char *> (dst_string.text));
3067 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3068 test.m_concats.record_string_concatenation (2, input_locs);
3070 location_t initial_loc = input_locs[0];
3072 /* "01234" on line 1. */
3073 for (int i = 0; i <= 4; i++)
3074 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3075 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3076 for (int i = 5; i <= 10; i++)
3077 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3079 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3082 /* Another test of string literal concatenation. */
3084 static void
3085 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3087 /* Digits 0-9.
3088 .....................000000000.111.11111112222222
3089 .....................123456789.012.34567890123456. */
3090 const char *content = (" \"01\" /* non-str */\n"
3091 " \"23\" /* non-str */\n"
3092 " \"45\" /* non-str */\n"
3093 " \"67\" /* non-str */\n"
3094 " \"89\" /* non-str */\n");
3095 lexer_test test (case_, content, NULL);
3097 auto_vec <cpp_string> input_strings;
3098 location_t input_locs[5];
3100 /* Verify that we get the expected tokens back. */
3101 for (int i = 0; i < 5; i++)
3103 const cpp_token *tok = test.get_token ();
3104 ASSERT_EQ (tok->type, CPP_STRING);
3105 input_strings.safe_push (tok->val.str);
3106 input_locs[i] = tok->src_loc;
3109 /* Verify that cpp_interpret_string works. */
3110 cpp_string dst_string;
3111 const enum cpp_ttype type = CPP_STRING;
3112 bool result = cpp_interpret_string (test.m_parser,
3113 input_strings.address (), 5,
3114 &dst_string, type);
3115 ASSERT_TRUE (result);
3116 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3117 free (const_cast <unsigned char *> (dst_string.text));
3119 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3120 test.m_concats.record_string_concatenation (5, input_locs);
3122 location_t initial_loc = input_locs[0];
3124 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3125 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3126 and expect get_source_range_for_substring to fail.
3127 However, for a string concatenation test, we can have a case
3128 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3129 but subsequent strings can be after it.
3130 Attempting to detect this within assert_char_at_range
3131 would overcomplicate the logic for the common test cases, so
3132 we detect it here. */
3133 if (should_have_column_data_p (input_locs[0])
3134 && !should_have_column_data_p (input_locs[4]))
3136 /* Verify that get_source_range_for_substring gracefully rejects
3137 this case. */
3138 source_range actual_range;
3139 const char *err
3140 = get_source_range_for_char (test.m_parser, &test.m_concats,
3141 initial_loc, type, 0, &actual_range);
3142 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3143 return;
3146 for (int i = 0; i < 5; i++)
3147 for (int j = 0; j < 2; j++)
3148 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3149 i + 1, 10 + j, 10 + j);
3151 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3152 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3154 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3157 /* Another test of string literal concatenation, this time combined with
3158 various kinds of escaped characters. */
3160 static void
3161 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3163 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3164 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3165 const char *content
3166 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3167 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3168 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3169 lexer_test test (case_, content, NULL);
3171 auto_vec <cpp_string> input_strings;
3172 location_t input_locs[4];
3174 /* Verify that we get the expected tokens back. */
3175 for (int i = 0; i < 4; i++)
3177 const cpp_token *tok = test.get_token ();
3178 ASSERT_EQ (tok->type, CPP_STRING);
3179 input_strings.safe_push (tok->val.str);
3180 input_locs[i] = tok->src_loc;
3183 /* Verify that cpp_interpret_string works. */
3184 cpp_string dst_string;
3185 const enum cpp_ttype type = CPP_STRING;
3186 bool result = cpp_interpret_string (test.m_parser,
3187 input_strings.address (), 4,
3188 &dst_string, type);
3189 ASSERT_TRUE (result);
3190 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3191 free (const_cast <unsigned char *> (dst_string.text));
3193 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3194 test.m_concats.record_string_concatenation (4, input_locs);
3196 location_t initial_loc = input_locs[0];
3198 for (int i = 0; i <= 4; i++)
3199 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3200 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3201 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3202 for (int i = 7; i <= 9; i++)
3203 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3205 /* NUL-terminator should use the location of the final closing quote. */
3206 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3208 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3211 /* Test of string literal in a macro. */
3213 static void
3214 test_lexer_string_locations_macro (const line_table_case &case_)
3216 /* Digits 0-9.
3217 .....................0000000001111111111.22222222223.
3218 .....................1234567890123456789.01234567890. */
3219 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3220 " MACRO");
3221 lexer_test test (case_, content, NULL);
3223 /* Verify that we get the expected tokens back. */
3224 const cpp_token *tok = test.get_token ();
3225 ASSERT_EQ (tok->type, CPP_PADDING);
3227 tok = test.get_token ();
3228 ASSERT_EQ (tok->type, CPP_STRING);
3229 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3231 /* Verify ranges of individual characters. We ought to
3232 see columns within the macro definition. */
3233 for (int i = 0; i <= 10; i++)
3234 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3235 i, 1, 20 + i, 20 + i);
3237 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3239 tok = test.get_token ();
3240 ASSERT_EQ (tok->type, CPP_PADDING);
3243 /* Test of stringification of a macro argument. */
3245 static void
3246 test_lexer_string_locations_stringified_macro_argument
3247 (const line_table_case &case_)
3249 /* .....................000000000111111111122222222223.
3250 .....................123456789012345678901234567890. */
3251 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3252 "MACRO(foo)\n");
3253 lexer_test test (case_, content, NULL);
3255 /* Verify that we get the expected token back. */
3256 const cpp_token *tok = test.get_token ();
3257 ASSERT_EQ (tok->type, CPP_PADDING);
3259 tok = test.get_token ();
3260 ASSERT_EQ (tok->type, CPP_STRING);
3261 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3263 /* We don't support getting the location of a stringified macro
3264 argument. Verify that it fails gracefully. */
3265 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3266 "cpp_interpret_string_1 failed");
3268 tok = test.get_token ();
3269 ASSERT_EQ (tok->type, CPP_PADDING);
3271 tok = test.get_token ();
3272 ASSERT_EQ (tok->type, CPP_PADDING);
3275 /* Ensure that we are fail gracefully if something attempts to pass
3276 in a location that isn't a string literal token. Seen on this code:
3278 const char a[] = " %d ";
3279 __builtin_printf (a, 0.5);
3282 when c-format.c erroneously used the indicated one-character
3283 location as the format string location, leading to a read past the
3284 end of a string buffer in cpp_interpret_string_1. */
3286 static void
3287 test_lexer_string_locations_non_string (const line_table_case &case_)
3289 /* .....................000000000111111111122222222223.
3290 .....................123456789012345678901234567890. */
3291 const char *content = (" a\n");
3292 lexer_test test (case_, content, NULL);
3294 /* Verify that we get the expected token back. */
3295 const cpp_token *tok = test.get_token ();
3296 ASSERT_EQ (tok->type, CPP_NAME);
3297 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3299 /* At this point, libcpp is attempting to interpret the name as a
3300 string literal, despite it not starting with a quote. We don't detect
3301 that, but we should at least fail gracefully. */
3302 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3303 "cpp_interpret_string_1 failed");
3306 /* Ensure that we can read substring information for a token which
3307 starts in one linemap and ends in another . Adapted from
3308 gcc.dg/cpp/pr69985.c. */
3310 static void
3311 test_lexer_string_locations_long_line (const line_table_case &case_)
3313 /* .....................000000.000111111111
3314 .....................123456.789012346789. */
3315 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3316 " \"0123456789012345678901234567890123456789"
3317 "0123456789012345678901234567890123456789"
3318 "0123456789012345678901234567890123456789"
3319 "0123456789\"\n");
3321 lexer_test test (case_, content, NULL);
3323 /* Verify that we get the expected token back. */
3324 const cpp_token *tok = test.get_token ();
3325 ASSERT_EQ (tok->type, CPP_STRING);
3327 if (!should_have_column_data_p (line_table->highest_location))
3328 return;
3330 /* Verify ranges of individual characters. */
3331 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3332 for (int i = 0; i < 131; i++)
3333 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3334 i, 2, 7 + i, 7 + i);
3337 /* Test of locations within a raw string that doesn't contain a newline. */
3339 static void
3340 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3342 /* .....................00.0000000111111111122.
3343 .....................12.3456789012345678901. */
3344 const char *content = ("R\"foo(0123456789)foo\"\n");
3345 lexer_test test (case_, content, NULL);
3347 /* Verify that we get the expected token back. */
3348 const cpp_token *tok = test.get_token ();
3349 ASSERT_EQ (tok->type, CPP_STRING);
3351 /* Verify that cpp_interpret_string works. */
3352 cpp_string dst_string;
3353 const enum cpp_ttype type = CPP_STRING;
3354 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3355 &dst_string, type);
3356 ASSERT_TRUE (result);
3357 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3358 free (const_cast <unsigned char *> (dst_string.text));
3360 if (!should_have_column_data_p (line_table->highest_location))
3361 return;
3363 /* 0-9, plus the nil terminator. */
3364 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3365 for (int i = 0; i < 11; i++)
3366 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3367 i, 1, 7 + i, 7 + i);
3370 /* Test of locations within a raw string that contains a newline. */
3372 static void
3373 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3375 /* .....................00.0000.
3376 .....................12.3456. */
3377 const char *content = ("R\"foo(\n"
3378 /* .....................00000.
3379 .....................12345. */
3380 "hello\n"
3381 "world\n"
3382 /* .....................00000.
3383 .....................12345. */
3384 ")foo\"\n");
3385 lexer_test test (case_, content, NULL);
3387 /* Verify that we get the expected token back. */
3388 const cpp_token *tok = test.get_token ();
3389 ASSERT_EQ (tok->type, CPP_STRING);
3391 /* Verify that cpp_interpret_string works. */
3392 cpp_string dst_string;
3393 const enum cpp_ttype type = CPP_STRING;
3394 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3395 &dst_string, type);
3396 ASSERT_TRUE (result);
3397 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3398 free (const_cast <unsigned char *> (dst_string.text));
3400 if (!should_have_column_data_p (line_table->highest_location))
3401 return;
3403 /* Currently we don't support locations within raw strings that
3404 contain newlines. */
3405 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3406 "range endpoints are on different lines");
3409 /* Test of parsing an unterminated raw string. */
3411 static void
3412 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3414 const char *content = "R\"ouch()ouCh\" /* etc */";
3416 lexer_diagnostic_sink diagnostics;
3417 lexer_test test (case_, content, &diagnostics);
3418 test.m_implicitly_expect_EOF = false;
3420 /* Attempt to parse the raw string. */
3421 const cpp_token *tok = test.get_token ();
3422 ASSERT_EQ (tok->type, CPP_EOF);
3424 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3425 /* We expect the message "unterminated raw string"
3426 in the "cpplib" translation domain.
3427 It's not clear that dgettext is available on all supported hosts,
3428 so this assertion is commented-out for now.
3429 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3430 diagnostics.m_diagnostics[0]);
3434 /* Test of lexing char constants. */
3436 static void
3437 test_lexer_char_constants (const line_table_case &case_)
3439 /* Various char constants.
3440 .....................0000000001111111111.22222222223.
3441 .....................1234567890123456789.01234567890. */
3442 const char *content = (" 'a'\n"
3443 " u'a'\n"
3444 " U'a'\n"
3445 " L'a'\n"
3446 " 'abc'\n");
3447 lexer_test test (case_, content, NULL);
3449 /* Verify that we get the expected tokens back. */
3450 /* 'a'. */
3451 const cpp_token *tok = test.get_token ();
3452 ASSERT_EQ (tok->type, CPP_CHAR);
3453 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3455 unsigned int chars_seen;
3456 int unsignedp;
3457 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3458 &chars_seen, &unsignedp);
3459 ASSERT_EQ (cc, 'a');
3460 ASSERT_EQ (chars_seen, 1);
3462 /* u'a'. */
3463 tok = test.get_token ();
3464 ASSERT_EQ (tok->type, CPP_CHAR16);
3465 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3467 /* U'a'. */
3468 tok = test.get_token ();
3469 ASSERT_EQ (tok->type, CPP_CHAR32);
3470 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3472 /* L'a'. */
3473 tok = test.get_token ();
3474 ASSERT_EQ (tok->type, CPP_WCHAR);
3475 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3477 /* 'abc' (c-char-sequence). */
3478 tok = test.get_token ();
3479 ASSERT_EQ (tok->type, CPP_CHAR);
3480 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3482 /* A table of interesting location_t values, giving one axis of our test
3483 matrix. */
3485 static const location_t boundary_locations[] = {
3486 /* Zero means "don't override the default values for a new line_table". */
3489 /* An arbitrary non-zero value that isn't close to one of
3490 the boundary values below. */
3491 0x10000,
3493 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3494 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3495 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3496 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3497 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3498 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3500 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3501 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3502 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3503 LINE_MAP_MAX_LOCATION_WITH_COLS,
3504 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3505 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3508 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3510 void
3511 for_each_line_table_case (void (*testcase) (const line_table_case &))
3513 /* As noted above in the description of struct line_table_case,
3514 we want to explore a test matrix of interesting line_table
3515 situations, running various selftests for each case within the
3516 matrix. */
3518 /* Run all tests with:
3519 (a) line_table->default_range_bits == 0, and
3520 (b) line_table->default_range_bits == 5. */
3521 int num_cases_tested = 0;
3522 for (int default_range_bits = 0; default_range_bits <= 5;
3523 default_range_bits += 5)
3525 /* ...and use each of the "interesting" location values as
3526 the starting location within line_table. */
3527 const int num_boundary_locations
3528 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3529 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3531 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3533 testcase (c);
3535 num_cases_tested++;
3539 /* Verify that we fully covered the test matrix. */
3540 ASSERT_EQ (num_cases_tested, 2 * 12);
3543 /* Run all of the selftests within this file. */
3545 void
3546 input_c_tests ()
3548 test_linenum_comparisons ();
3549 test_should_have_column_data_p ();
3550 test_unknown_location ();
3551 test_builtins ();
3552 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3554 for_each_line_table_case (test_accessing_ordinary_linemaps);
3555 for_each_line_table_case (test_lexer);
3556 for_each_line_table_case (test_lexer_string_locations_simple);
3557 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3558 for_each_line_table_case (test_lexer_string_locations_hex);
3559 for_each_line_table_case (test_lexer_string_locations_oct);
3560 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3561 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3562 for_each_line_table_case (test_lexer_string_locations_ucn4);
3563 for_each_line_table_case (test_lexer_string_locations_ucn8);
3564 for_each_line_table_case (test_lexer_string_locations_wide_string);
3565 for_each_line_table_case (test_lexer_string_locations_string16);
3566 for_each_line_table_case (test_lexer_string_locations_string32);
3567 for_each_line_table_case (test_lexer_string_locations_u8);
3568 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3569 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3570 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3571 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3572 for_each_line_table_case (test_lexer_string_locations_macro);
3573 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3574 for_each_line_table_case (test_lexer_string_locations_non_string);
3575 for_each_line_table_case (test_lexer_string_locations_long_line);
3576 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3577 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3578 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3579 for_each_line_table_case (test_lexer_char_constants);
3581 test_reading_source_line ();
3584 } // namespace selftest
3586 #endif /* CHECKING_P */