PR target/81369
[official-gcc.git] / gcc / input.c
blob0480eb24ec01b51c8353258db8c3cf59e6b66931
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic-core.h"
25 #include "selftest.h"
26 #include "cpplib.h"
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
32 /* This is a cache used by get_next_line to store the content of a
33 file to be searched for file lines. */
34 struct fcache
36 /* These are information used to store a line boundary. */
37 struct line_info
39 /* The line number. It starts from 1. */
40 size_t line_num;
42 /* The position (byte count) of the beginning of the line,
43 relative to the file data pointer. This starts at zero. */
44 size_t start_pos;
46 /* The position (byte count) of the last byte of the line. This
47 normally points to the '\n' character, or to one byte after the
48 last byte of the file, if the file doesn't contain a '\n'
49 character. */
50 size_t end_pos;
52 line_info (size_t l, size_t s, size_t e)
53 : line_num (l), start_pos (s), end_pos (e)
56 line_info ()
57 :line_num (0), start_pos (0), end_pos (0)
61 /* The number of time this file has been accessed. This is used
62 to designate which file cache to evict from the cache
63 array. */
64 unsigned use_count;
66 /* The file_path is the key for identifying a particular file in
67 the cache.
68 For libcpp-using code, the underlying buffer for this field is
69 owned by the corresponding _cpp_file within the cpp_reader. */
70 const char *file_path;
72 FILE *fp;
74 /* This points to the content of the file that we've read so
75 far. */
76 char *data;
78 /* The size of the DATA array above.*/
79 size_t size;
81 /* The number of bytes read from the underlying file so far. This
82 must be less (or equal) than SIZE above. */
83 size_t nb_read;
85 /* The index of the beginning of the current line. */
86 size_t line_start_idx;
88 /* The number of the previous line read. This starts at 1. Zero
89 means we've read no line so far. */
90 size_t line_num;
92 /* This is the total number of lines of the current file. At the
93 moment, we try to get this information from the line map
94 subsystem. Note that this is just a hint. When using the C++
95 front-end, this hint is correct because the input file is then
96 completely tokenized before parsing starts; so the line map knows
97 the number of lines before compilation really starts. For e.g,
98 the C front-end, it can happen that we start emitting diagnostics
99 before the line map has seen the end of the file. */
100 size_t total_lines;
102 /* Could this file be missing a trailing newline on its final line?
103 Initially true (to cope with empty files), set to true/false
104 as each line is read. */
105 bool missing_trailing_newline;
107 /* This is a record of the beginning and end of the lines we've seen
108 while reading the file. This is useful to avoid walking the data
109 from the beginning when we are asked to read a line that is
110 before LINE_START_IDX above. Note that the maximum size of this
111 record is fcache_line_record_size, so that the memory consumption
112 doesn't explode. We thus scale total_lines down to
113 fcache_line_record_size. */
114 vec<line_info, va_heap> line_record;
116 fcache ();
117 ~fcache ();
120 /* Current position in real source file. */
122 location_t input_location = UNKNOWN_LOCATION;
124 struct line_maps *line_table;
126 /* A stashed copy of "line_table" for use by selftest::line_table_test.
127 This needs to be a global so that it can be a GC root, and thus
128 prevent the stashed copy from being garbage-collected if the GC runs
129 during a line_table_test. */
131 struct line_maps *saved_line_table;
133 static fcache *fcache_tab;
134 static const size_t fcache_tab_size = 16;
135 static const size_t fcache_buffer_size = 4 * 1024;
136 static const size_t fcache_line_record_size = 100;
138 /* Expand the source location LOC into a human readable location. If
139 LOC resolves to a builtin location, the file name of the readable
140 location is set to the string "<built-in>". If EXPANSION_POINT_P is
141 TRUE and LOC is virtual, then it is resolved to the expansion
142 point of the involved macro. Otherwise, it is resolved to the
143 spelling location of the token.
145 When resolving to the spelling location of the token, if the
146 resulting location is for a built-in location (that is, it has no
147 associated line/column) in the context of a macro expansion, the
148 returned location is the first one (while unwinding the macro
149 location towards its expansion point) that is in real source
150 code.
152 ASPECT controls which part of the location to use. */
154 static expanded_location
155 expand_location_1 (source_location loc,
156 bool expansion_point_p,
157 enum location_aspect aspect)
159 expanded_location xloc;
160 const line_map_ordinary *map;
161 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
162 tree block = NULL;
164 if (IS_ADHOC_LOC (loc))
166 block = LOCATION_BLOCK (loc);
167 loc = LOCATION_LOCUS (loc);
170 memset (&xloc, 0, sizeof (xloc));
172 if (loc >= RESERVED_LOCATION_COUNT)
174 if (!expansion_point_p)
176 /* We want to resolve LOC to its spelling location.
178 But if that spelling location is a reserved location that
179 appears in the context of a macro expansion (like for a
180 location for a built-in token), let's consider the first
181 location (toward the expansion point) that is not reserved;
182 that is, the first location that is in real source code. */
183 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
184 loc, NULL);
185 lrk = LRK_SPELLING_LOCATION;
187 loc = linemap_resolve_location (line_table, loc, lrk, &map);
189 /* loc is now either in an ordinary map, or is a reserved location.
190 If it is a compound location, the caret is in a spelling location,
191 but the start/finish might still be a virtual location.
192 Depending of what the caller asked for, we may need to recurse
193 one level in order to resolve any virtual locations in the
194 end-points. */
195 switch (aspect)
197 default:
198 gcc_unreachable ();
199 /* Fall through. */
200 case LOCATION_ASPECT_CARET:
201 break;
202 case LOCATION_ASPECT_START:
204 source_location start = get_start (loc);
205 if (start != loc)
206 return expand_location_1 (start, expansion_point_p, aspect);
208 break;
209 case LOCATION_ASPECT_FINISH:
211 source_location finish = get_finish (loc);
212 if (finish != loc)
213 return expand_location_1 (finish, expansion_point_p, aspect);
215 break;
217 xloc = linemap_expand_location (line_table, map, loc);
220 xloc.data = block;
221 if (loc <= BUILTINS_LOCATION)
222 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
224 return xloc;
227 /* Initialize the set of cache used for files accessed by caret
228 diagnostic. */
230 static void
231 diagnostic_file_cache_init (void)
233 if (fcache_tab == NULL)
234 fcache_tab = new fcache[fcache_tab_size];
237 /* Free the resources used by the set of cache used for files accessed
238 by caret diagnostic. */
240 void
241 diagnostic_file_cache_fini (void)
243 if (fcache_tab)
245 delete [] (fcache_tab);
246 fcache_tab = NULL;
250 /* Return the total lines number that have been read so far by the
251 line map (in the preprocessor) so far. For languages like C++ that
252 entirely preprocess the input file before starting to parse, this
253 equals the actual number of lines of the file. */
255 static size_t
256 total_lines_num (const char *file_path)
258 size_t r = 0;
259 source_location l = 0;
260 if (linemap_get_file_highest_location (line_table, file_path, &l))
262 gcc_assert (l >= RESERVED_LOCATION_COUNT);
263 expanded_location xloc = expand_location (l);
264 r = xloc.line;
266 return r;
269 /* Lookup the cache used for the content of a given file accessed by
270 caret diagnostic. Return the found cached file, or NULL if no
271 cached file was found. */
273 static fcache*
274 lookup_file_in_cache_tab (const char *file_path)
276 if (file_path == NULL)
277 return NULL;
279 diagnostic_file_cache_init ();
281 /* This will contain the found cached file. */
282 fcache *r = NULL;
283 for (unsigned i = 0; i < fcache_tab_size; ++i)
285 fcache *c = &fcache_tab[i];
286 if (c->file_path && !strcmp (c->file_path, file_path))
288 ++c->use_count;
289 r = c;
293 if (r)
294 ++r->use_count;
296 return r;
299 /* Purge any mention of FILENAME from the cache of files used for
300 printing source code. For use in selftests when working
301 with tempfiles. */
303 void
304 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
306 gcc_assert (file_path);
308 fcache *r = lookup_file_in_cache_tab (file_path);
309 if (!r)
310 /* Not found. */
311 return;
313 r->file_path = NULL;
314 if (r->fp)
315 fclose (r->fp);
316 r->fp = NULL;
317 r->nb_read = 0;
318 r->line_start_idx = 0;
319 r->line_num = 0;
320 r->line_record.truncate (0);
321 r->use_count = 0;
322 r->total_lines = 0;
323 r->missing_trailing_newline = true;
326 /* Return the file cache that has been less used, recently, or the
327 first empty one. If HIGHEST_USE_COUNT is non-null,
328 *HIGHEST_USE_COUNT is set to the highest use count of the entries
329 in the cache table. */
331 static fcache*
332 evicted_cache_tab_entry (unsigned *highest_use_count)
334 diagnostic_file_cache_init ();
336 fcache *to_evict = &fcache_tab[0];
337 unsigned huc = to_evict->use_count;
338 for (unsigned i = 1; i < fcache_tab_size; ++i)
340 fcache *c = &fcache_tab[i];
341 bool c_is_empty = (c->file_path == NULL);
343 if (c->use_count < to_evict->use_count
344 || (to_evict->file_path && c_is_empty))
345 /* We evict C because it's either an entry with a lower use
346 count or one that is empty. */
347 to_evict = c;
349 if (huc < c->use_count)
350 huc = c->use_count;
352 if (c_is_empty)
353 /* We've reached the end of the cache; subsequent elements are
354 all empty. */
355 break;
358 if (highest_use_count)
359 *highest_use_count = huc;
361 return to_evict;
364 /* Create the cache used for the content of a given file to be
365 accessed by caret diagnostic. This cache is added to an array of
366 cache and can be retrieved by lookup_file_in_cache_tab. This
367 function returns the created cache. Note that only the last
368 fcache_tab_size files are cached. */
370 static fcache*
371 add_file_to_cache_tab (const char *file_path)
374 FILE *fp = fopen (file_path, "r");
375 if (fp == NULL)
376 return NULL;
378 unsigned highest_use_count = 0;
379 fcache *r = evicted_cache_tab_entry (&highest_use_count);
380 r->file_path = file_path;
381 if (r->fp)
382 fclose (r->fp);
383 r->fp = fp;
384 r->nb_read = 0;
385 r->line_start_idx = 0;
386 r->line_num = 0;
387 r->line_record.truncate (0);
388 /* Ensure that this cache entry doesn't get evicted next time
389 add_file_to_cache_tab is called. */
390 r->use_count = ++highest_use_count;
391 r->total_lines = total_lines_num (file_path);
392 r->missing_trailing_newline = true;
394 return r;
397 /* Lookup the cache used for the content of a given file accessed by
398 caret diagnostic. If no cached file was found, create a new cache
399 for this file, add it to the array of cached file and return
400 it. */
402 static fcache*
403 lookup_or_add_file_to_cache_tab (const char *file_path)
405 fcache *r = lookup_file_in_cache_tab (file_path);
406 if (r == NULL)
407 r = add_file_to_cache_tab (file_path);
408 return r;
411 /* Default constructor for a cache of file used by caret
412 diagnostic. */
414 fcache::fcache ()
415 : use_count (0), file_path (NULL), fp (NULL), data (0),
416 size (0), nb_read (0), line_start_idx (0), line_num (0),
417 total_lines (0), missing_trailing_newline (true)
419 line_record.create (0);
422 /* Destructor for a cache of file used by caret diagnostic. */
424 fcache::~fcache ()
426 if (fp)
428 fclose (fp);
429 fp = NULL;
431 if (data)
433 XDELETEVEC (data);
434 data = 0;
436 line_record.release ();
439 /* Returns TRUE iff the cache would need to be filled with data coming
440 from the file. That is, either the cache is empty or full or the
441 current line is empty. Note that if the cache is full, it would
442 need to be extended and filled again. */
444 static bool
445 needs_read (fcache *c)
447 return (c->nb_read == 0
448 || c->nb_read == c->size
449 || (c->line_start_idx >= c->nb_read - 1));
452 /* Return TRUE iff the cache is full and thus needs to be
453 extended. */
455 static bool
456 needs_grow (fcache *c)
458 return c->nb_read == c->size;
461 /* Grow the cache if it needs to be extended. */
463 static void
464 maybe_grow (fcache *c)
466 if (!needs_grow (c))
467 return;
469 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
470 c->data = XRESIZEVEC (char, c->data, size);
471 c->size = size;
474 /* Read more data into the cache. Extends the cache if need be.
475 Returns TRUE iff new data could be read. */
477 static bool
478 read_data (fcache *c)
480 if (feof (c->fp) || ferror (c->fp))
481 return false;
483 maybe_grow (c);
485 char * from = c->data + c->nb_read;
486 size_t to_read = c->size - c->nb_read;
487 size_t nb_read = fread (from, 1, to_read, c->fp);
489 if (ferror (c->fp))
490 return false;
492 c->nb_read += nb_read;
493 return !!nb_read;
496 /* Read new data iff the cache needs to be filled with more data
497 coming from the file FP. Return TRUE iff the cache was filled with
498 mode data. */
500 static bool
501 maybe_read_data (fcache *c)
503 if (!needs_read (c))
504 return false;
505 return read_data (c);
508 /* Read a new line from file FP, using C as a cache for the data
509 coming from the file. Upon successful completion, *LINE is set to
510 the beginning of the line found. *LINE points directly in the
511 line cache and is only valid until the next call of get_next_line.
512 *LINE_LEN is set to the length of the line. Note that the line
513 does not contain any terminal delimiter. This function returns
514 true if some data was read or process from the cache, false
515 otherwise. Note that subsequent calls to get_next_line might
516 make the content of *LINE invalid. */
518 static bool
519 get_next_line (fcache *c, char **line, ssize_t *line_len)
521 /* Fill the cache with data to process. */
522 maybe_read_data (c);
524 size_t remaining_size = c->nb_read - c->line_start_idx;
525 if (remaining_size == 0)
526 /* There is no more data to process. */
527 return false;
529 char *line_start = c->data + c->line_start_idx;
531 char *next_line_start = NULL;
532 size_t len = 0;
533 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
534 if (line_end == NULL)
536 /* We haven't found the end-of-line delimiter in the cache.
537 Fill the cache with more data from the file and look for the
538 '\n'. */
539 while (maybe_read_data (c))
541 line_start = c->data + c->line_start_idx;
542 remaining_size = c->nb_read - c->line_start_idx;
543 line_end = (char *) memchr (line_start, '\n', remaining_size);
544 if (line_end != NULL)
546 next_line_start = line_end + 1;
547 break;
550 if (line_end == NULL)
552 /* We've loadded all the file into the cache and still no
553 '\n'. Let's say the line ends up at one byte passed the
554 end of the file. This is to stay consistent with the case
555 of when the line ends up with a '\n' and line_end points to
556 that terminal '\n'. That consistency is useful below in
557 the len calculation. */
558 line_end = c->data + c->nb_read ;
559 c->missing_trailing_newline = true;
561 else
562 c->missing_trailing_newline = false;
564 else
566 next_line_start = line_end + 1;
567 c->missing_trailing_newline = false;
570 if (ferror (c->fp))
571 return false;
573 /* At this point, we've found the end of the of line. It either
574 points to the '\n' or to one byte after the last byte of the
575 file. */
576 gcc_assert (line_end != NULL);
578 len = line_end - line_start;
580 if (c->line_start_idx < c->nb_read)
581 *line = line_start;
583 ++c->line_num;
585 /* Before we update our line record, make sure the hint about the
586 total number of lines of the file is correct. If it's not, then
587 we give up recording line boundaries from now on. */
588 bool update_line_record = true;
589 if (c->line_num > c->total_lines)
590 update_line_record = false;
592 /* Now update our line record so that re-reading lines from the
593 before c->line_start_idx is faster. */
594 if (update_line_record
595 && c->line_record.length () < fcache_line_record_size)
597 /* If the file lines fits in the line record, we just record all
598 its lines ...*/
599 if (c->total_lines <= fcache_line_record_size
600 && c->line_num > c->line_record.length ())
601 c->line_record.safe_push (fcache::line_info (c->line_num,
602 c->line_start_idx,
603 line_end - c->data));
604 else if (c->total_lines > fcache_line_record_size)
606 /* ... otherwise, we just scale total_lines down to
607 (fcache_line_record_size lines. */
608 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
609 if (c->line_record.length () == 0
610 || n >= c->line_record.length ())
611 c->line_record.safe_push (fcache::line_info (c->line_num,
612 c->line_start_idx,
613 line_end - c->data));
617 /* Update c->line_start_idx so that it points to the next line to be
618 read. */
619 if (next_line_start)
620 c->line_start_idx = next_line_start - c->data;
621 else
622 /* We didn't find any terminal '\n'. Let's consider that the end
623 of line is the end of the data in the cache. The next
624 invocation of get_next_line will either read more data from the
625 underlying file or return false early because we've reached the
626 end of the file. */
627 c->line_start_idx = c->nb_read;
629 *line_len = len;
631 return true;
634 /* Consume the next bytes coming from the cache (or from its
635 underlying file if there are remaining unread bytes in the file)
636 until we reach the next end-of-line (or end-of-file). There is no
637 copying from the cache involved. Return TRUE upon successful
638 completion. */
640 static bool
641 goto_next_line (fcache *cache)
643 char *l;
644 ssize_t len;
646 return get_next_line (cache, &l, &len);
649 /* Read an arbitrary line number LINE_NUM from the file cached in C.
650 If the line was read successfully, *LINE points to the beginning
651 of the line in the file cache and *LINE_LEN is the length of the
652 line. *LINE is not nul-terminated, but may contain zero bytes.
653 *LINE is only valid until the next call of read_line_num.
654 This function returns bool if a line was read. */
656 static bool
657 read_line_num (fcache *c, size_t line_num,
658 char **line, ssize_t *line_len)
660 gcc_assert (line_num > 0);
662 if (line_num <= c->line_num)
664 /* We've been asked to read lines that are before c->line_num.
665 So lets use our line record (if it's not empty) to try to
666 avoid re-reading the file from the beginning again. */
668 if (c->line_record.is_empty ())
670 c->line_start_idx = 0;
671 c->line_num = 0;
673 else
675 fcache::line_info *i = NULL;
676 if (c->total_lines <= fcache_line_record_size)
678 /* In languages where the input file is not totally
679 preprocessed up front, the c->total_lines hint
680 can be smaller than the number of lines of the
681 file. In that case, only the first
682 c->total_lines have been recorded.
684 Otherwise, the first c->total_lines we've read have
685 their start/end recorded here. */
686 i = (line_num <= c->total_lines)
687 ? &c->line_record[line_num - 1]
688 : &c->line_record[c->total_lines - 1];
689 gcc_assert (i->line_num <= line_num);
691 else
693 /* So the file had more lines than our line record
694 size. Thus the number of lines we've recorded has
695 been scaled down to fcache_line_reacord_size. Let's
696 pick the start/end of the recorded line that is
697 closest to line_num. */
698 size_t n = (line_num <= c->total_lines)
699 ? line_num * fcache_line_record_size / c->total_lines
700 : c ->line_record.length () - 1;
701 if (n < c->line_record.length ())
703 i = &c->line_record[n];
704 gcc_assert (i->line_num <= line_num);
708 if (i && i->line_num == line_num)
710 /* We have the start/end of the line. */
711 *line = c->data + i->start_pos;
712 *line_len = i->end_pos - i->start_pos;
713 return true;
716 if (i)
718 c->line_start_idx = i->start_pos;
719 c->line_num = i->line_num - 1;
721 else
723 c->line_start_idx = 0;
724 c->line_num = 0;
729 /* Let's walk from line c->line_num up to line_num - 1, without
730 copying any line. */
731 while (c->line_num < line_num - 1)
732 if (!goto_next_line (c))
733 return false;
735 /* The line we want is the next one. Let's read and copy it back to
736 the caller. */
737 return get_next_line (c, line, line_len);
740 /* Return the physical source line that corresponds to FILE_PATH/LINE.
741 The line is not nul-terminated. The returned pointer is only
742 valid until the next call of location_get_source_line.
743 Note that the line can contain several null characters,
744 so LINE_LEN, if non-null, points to the actual length of the line.
745 If the function fails, NULL is returned. */
747 const char *
748 location_get_source_line (const char *file_path, int line,
749 int *line_len)
751 char *buffer = NULL;
752 ssize_t len;
754 if (line == 0)
755 return NULL;
757 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
758 if (c == NULL)
759 return NULL;
761 bool read = read_line_num (c, line, &buffer, &len);
763 if (read && line_len)
764 *line_len = len;
766 return read ? buffer : NULL;
769 /* Determine if FILE_PATH missing a trailing newline on its final line.
770 Only valid to call once all of the file has been loaded, by
771 requesting a line number beyond the end of the file. */
773 bool
774 location_missing_trailing_newline (const char *file_path)
776 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
777 if (c == NULL)
778 return false;
780 return c->missing_trailing_newline;
783 /* Test if the location originates from the spelling location of a
784 builtin-tokens. That is, return TRUE if LOC is a (possibly
785 virtual) location of a built-in token that appears in the expansion
786 list of a macro. Please note that this function also works on
787 tokens that result from built-in tokens. For instance, the
788 function would return true if passed a token "4" that is the result
789 of the expansion of the built-in __LINE__ macro. */
790 bool
791 is_location_from_builtin_token (source_location loc)
793 const line_map_ordinary *map = NULL;
794 loc = linemap_resolve_location (line_table, loc,
795 LRK_SPELLING_LOCATION, &map);
796 return loc == BUILTINS_LOCATION;
799 /* Expand the source location LOC into a human readable location. If
800 LOC is virtual, it resolves to the expansion point of the involved
801 macro. If LOC resolves to a builtin location, the file name of the
802 readable location is set to the string "<built-in>". */
804 expanded_location
805 expand_location (source_location loc)
807 return expand_location_1 (loc, /*expansion_point_p=*/true,
808 LOCATION_ASPECT_CARET);
811 /* Expand the source location LOC into a human readable location. If
812 LOC is virtual, it resolves to the expansion location of the
813 relevant macro. If LOC resolves to a builtin location, the file
814 name of the readable location is set to the string
815 "<built-in>". */
817 expanded_location
818 expand_location_to_spelling_point (source_location loc)
820 return expand_location_1 (loc, /*expansion_point_p=*/false,
821 LOCATION_ASPECT_CARET);
824 /* The rich_location class within libcpp requires a way to expand
825 source_location instances, and relies on the client code
826 providing a symbol named
827 linemap_client_expand_location_to_spelling_point
828 to do this.
830 This is the implementation for libcommon.a (all host binaries),
831 which simply calls into expand_location_1. */
833 expanded_location
834 linemap_client_expand_location_to_spelling_point (source_location loc,
835 enum location_aspect aspect)
837 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
841 /* If LOCATION is in a system header and if it is a virtual location for
842 a token coming from the expansion of a macro, unwind it to the
843 location of the expansion point of the macro. Otherwise, just return
844 LOCATION.
846 This is used for instance when we want to emit diagnostics about a
847 token that may be located in a macro that is itself defined in a
848 system header, for example, for the NULL macro. In such a case, if
849 LOCATION were passed directly to diagnostic functions such as
850 warning_at, the diagnostic would be suppressed (unless
851 -Wsystem-headers). */
853 source_location
854 expansion_point_location_if_in_system_header (source_location location)
856 if (in_system_header_at (location))
857 location = linemap_resolve_location (line_table, location,
858 LRK_MACRO_EXPANSION_POINT,
859 NULL);
860 return location;
863 /* If LOCATION is a virtual location for a token coming from the expansion
864 of a macro, unwind to the location of the expansion point of the macro. */
866 source_location
867 expansion_point_location (source_location location)
869 return linemap_resolve_location (line_table, location,
870 LRK_MACRO_EXPANSION_POINT, NULL);
873 /* Construct a location with caret at CARET, ranging from START to
874 finish e.g.
876 11111111112
877 12345678901234567890
879 523 return foo + bar;
880 ~~~~^~~~~
883 The location's caret is at the "+", line 523 column 15, but starts
884 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
885 of "bar" at column 19. */
887 location_t
888 make_location (location_t caret, location_t start, location_t finish)
890 location_t pure_loc = get_pure_location (caret);
891 source_range src_range;
892 src_range.m_start = get_start (start);
893 src_range.m_finish = get_finish (finish);
894 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
895 pure_loc,
896 src_range,
897 NULL);
898 return combined_loc;
901 #define ONE_K 1024
902 #define ONE_M (ONE_K * ONE_K)
904 /* Display a number as an integer multiple of either:
905 - 1024, if said integer is >= to 10 K (in base 2)
906 - 1024 * 1024, if said integer is >= 10 M in (base 2)
908 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
909 ? (x) \
910 : ((x) < 10 * ONE_M \
911 ? (x) / ONE_K \
912 : (x) / ONE_M)))
914 /* For a given integer, display either:
915 - the character 'k', if the number is higher than 10 K (in base 2)
916 but strictly lower than 10 M (in base 2)
917 - the character 'M' if the number is higher than 10 M (in base2)
918 - the charcter ' ' if the number is strictly lower than 10 K */
919 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
921 /* Display an integer amount as multiple of 1K or 1M (in base 2).
922 Display the correct unit (either k, M, or ' ') after the amount, as
923 well. */
924 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
926 /* Dump statistics to stderr about the memory usage of the line_table
927 set of line maps. This also displays some statistics about macro
928 expansion. */
930 void
931 dump_line_table_statistics (void)
933 struct linemap_stats s;
934 long total_used_map_size,
935 macro_maps_size,
936 total_allocated_map_size;
938 memset (&s, 0, sizeof (s));
940 linemap_get_statistics (line_table, &s);
942 macro_maps_size = s.macro_maps_used_size
943 + s.macro_maps_locations_size;
945 total_allocated_map_size = s.ordinary_maps_allocated_size
946 + s.macro_maps_allocated_size
947 + s.macro_maps_locations_size;
949 total_used_map_size = s.ordinary_maps_used_size
950 + s.macro_maps_used_size
951 + s.macro_maps_locations_size;
953 fprintf (stderr, "Number of expanded macros: %5ld\n",
954 s.num_expanded_macros);
955 if (s.num_expanded_macros != 0)
956 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
957 s.num_macro_tokens / s.num_expanded_macros);
958 fprintf (stderr,
959 "\nLine Table allocations during the "
960 "compilation process\n");
961 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
962 SCALE (s.num_ordinary_maps_used),
963 STAT_LABEL (s.num_ordinary_maps_used));
964 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
965 SCALE (s.ordinary_maps_used_size),
966 STAT_LABEL (s.ordinary_maps_used_size));
967 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
968 SCALE (s.num_ordinary_maps_allocated),
969 STAT_LABEL (s.num_ordinary_maps_allocated));
970 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
971 SCALE (s.ordinary_maps_allocated_size),
972 STAT_LABEL (s.ordinary_maps_allocated_size));
973 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
974 SCALE (s.num_macro_maps_used),
975 STAT_LABEL (s.num_macro_maps_used));
976 fprintf (stderr, "Macro maps used size: %5ld%c\n",
977 SCALE (s.macro_maps_used_size),
978 STAT_LABEL (s.macro_maps_used_size));
979 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
980 SCALE (s.macro_maps_locations_size),
981 STAT_LABEL (s.macro_maps_locations_size));
982 fprintf (stderr, "Macro maps size: %5ld%c\n",
983 SCALE (macro_maps_size),
984 STAT_LABEL (macro_maps_size));
985 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
986 SCALE (s.duplicated_macro_maps_locations_size),
987 STAT_LABEL (s.duplicated_macro_maps_locations_size));
988 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
989 SCALE (total_allocated_map_size),
990 STAT_LABEL (total_allocated_map_size));
991 fprintf (stderr, "Total used maps size: %5ld%c\n",
992 SCALE (total_used_map_size),
993 STAT_LABEL (total_used_map_size));
994 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
995 SCALE (s.adhoc_table_size),
996 STAT_LABEL (s.adhoc_table_size));
997 fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
998 s.adhoc_table_entries_used);
999 fprintf (stderr, "optimized_ranges: %i\n",
1000 line_table->num_optimized_ranges);
1001 fprintf (stderr, "unoptimized_ranges: %i\n",
1002 line_table->num_unoptimized_ranges);
1004 fprintf (stderr, "\n");
1007 /* Get location one beyond the final location in ordinary map IDX. */
1009 static source_location
1010 get_end_location (struct line_maps *set, unsigned int idx)
1012 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1013 return set->highest_location;
1015 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1016 return MAP_START_LOCATION (next_map);
1019 /* Helper function for write_digit_row. */
1021 static void
1022 write_digit (FILE *stream, int digit)
1024 fputc ('0' + (digit % 10), stream);
1027 /* Helper function for dump_location_info.
1028 Write a row of numbers to STREAM, numbering a source line,
1029 giving the units, tens, hundreds etc of the column number. */
1031 static void
1032 write_digit_row (FILE *stream, int indent,
1033 const line_map_ordinary *map,
1034 source_location loc, int max_col, int divisor)
1036 fprintf (stream, "%*c", indent, ' ');
1037 fprintf (stream, "|");
1038 for (int column = 1; column < max_col; column++)
1040 source_location column_loc = loc + (column << map->m_range_bits);
1041 write_digit (stream, column_loc / divisor);
1043 fprintf (stream, "\n");
1046 /* Write a half-closed (START) / half-open (END) interval of
1047 source_location to STREAM. */
1049 static void
1050 dump_location_range (FILE *stream,
1051 source_location start, source_location end)
1053 fprintf (stream,
1054 " source_location interval: %u <= loc < %u\n",
1055 start, end);
1058 /* Write a labelled description of a half-closed (START) / half-open (END)
1059 interval of source_location to STREAM. */
1061 static void
1062 dump_labelled_location_range (FILE *stream,
1063 const char *name,
1064 source_location start, source_location end)
1066 fprintf (stream, "%s\n", name);
1067 dump_location_range (stream, start, end);
1068 fprintf (stream, "\n");
1071 /* Write a visualization of the locations in the line_table to STREAM. */
1073 void
1074 dump_location_info (FILE *stream)
1076 /* Visualize the reserved locations. */
1077 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1078 0, RESERVED_LOCATION_COUNT);
1080 /* Visualize the ordinary line_map instances, rendering the sources. */
1081 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1083 source_location end_location = get_end_location (line_table, idx);
1084 /* half-closed: doesn't include this one. */
1086 const line_map_ordinary *map
1087 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1088 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1089 dump_location_range (stream,
1090 MAP_START_LOCATION (map), end_location);
1091 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1092 fprintf (stream, " starting at line: %i\n",
1093 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1094 fprintf (stream, " column and range bits: %i\n",
1095 map->m_column_and_range_bits);
1096 fprintf (stream, " column bits: %i\n",
1097 map->m_column_and_range_bits - map->m_range_bits);
1098 fprintf (stream, " range bits: %i\n",
1099 map->m_range_bits);
1101 /* Render the span of source lines that this "map" covers. */
1102 for (source_location loc = MAP_START_LOCATION (map);
1103 loc < end_location;
1104 loc += (1 << map->m_range_bits) )
1106 gcc_assert (pure_location_p (line_table, loc) );
1108 expanded_location exploc
1109 = linemap_expand_location (line_table, map, loc);
1111 if (0 == exploc.column)
1113 /* Beginning of a new source line: draw the line. */
1115 int line_size;
1116 const char *line_text = location_get_source_line (exploc.file,
1117 exploc.line,
1118 &line_size);
1119 if (!line_text)
1120 break;
1121 fprintf (stream,
1122 "%s:%3i|loc:%5i|%.*s\n",
1123 exploc.file, exploc.line,
1124 loc,
1125 line_size, line_text);
1127 /* "loc" is at column 0, which means "the whole line".
1128 Render the locations *within* the line, by underlining
1129 it, showing the source_location numeric values
1130 at each column. */
1131 int max_col = (1 << map->m_column_and_range_bits) - 1;
1132 if (max_col > line_size)
1133 max_col = line_size + 1;
1135 int indent = 14 + strlen (exploc.file);
1137 /* Thousands. */
1138 if (end_location > 999)
1139 write_digit_row (stream, indent, map, loc, max_col, 1000);
1141 /* Hundreds. */
1142 if (end_location > 99)
1143 write_digit_row (stream, indent, map, loc, max_col, 100);
1145 /* Tens. */
1146 write_digit_row (stream, indent, map, loc, max_col, 10);
1148 /* Units. */
1149 write_digit_row (stream, indent, map, loc, max_col, 1);
1152 fprintf (stream, "\n");
1155 /* Visualize unallocated values. */
1156 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1157 line_table->highest_location,
1158 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1160 /* Visualize the macro line_map instances, rendering the sources. */
1161 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1163 /* Each macro map that is allocated owns source_location values
1164 that are *lower* that the one before them.
1165 Hence it's meaningful to view them either in order of ascending
1166 source locations, or in order of ascending macro map index. */
1167 const bool ascending_source_locations = true;
1168 unsigned int idx = (ascending_source_locations
1169 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1170 : i);
1171 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1172 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1173 idx,
1174 linemap_map_get_macro_name (map),
1175 MACRO_MAP_NUM_MACRO_TOKENS (map));
1176 dump_location_range (stream,
1177 map->start_location,
1178 (map->start_location
1179 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1180 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1181 "expansion point is location %i",
1182 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1183 fprintf (stream, " map->start_location: %u\n",
1184 map->start_location);
1186 fprintf (stream, " macro_locations:\n");
1187 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1189 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1190 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1192 /* linemap_add_macro_token encodes token numbers in an expansion
1193 by putting them after MAP_START_LOCATION. */
1195 /* I'm typically seeing 4 uninitialized entries at the end of
1196 0xafafafaf.
1197 This appears to be due to macro.c:replace_args
1198 adding 2 extra args for padding tokens; presumably there may
1199 be a leading and/or trailing padding token injected,
1200 each for 2 more location slots.
1201 This would explain there being up to 4 source_locations slots
1202 that may be uninitialized. */
1204 fprintf (stream, " %u: %u, %u\n",
1208 if (x == y)
1210 if (x < MAP_START_LOCATION (map))
1211 inform (x, "token %u has x-location == y-location == %u", i, x);
1212 else
1213 fprintf (stream,
1214 "x-location == y-location == %u encodes token # %u\n",
1215 x, x - MAP_START_LOCATION (map));
1217 else
1219 inform (x, "token %u has x-location == %u", i, x);
1220 inform (x, "token %u has y-location == %u", i, y);
1223 fprintf (stream, "\n");
1226 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1227 macro map, presumably due to an off-by-one error somewhere
1228 between the logic in linemap_enter_macro and
1229 LINEMAPS_MACRO_LOWEST_LOCATION. */
1230 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1231 MAX_SOURCE_LOCATION,
1232 MAX_SOURCE_LOCATION + 1);
1234 /* Visualize ad-hoc values. */
1235 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1236 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1239 /* string_concat's constructor. */
1241 string_concat::string_concat (int num, location_t *locs)
1242 : m_num (num)
1244 m_locs = ggc_vec_alloc <location_t> (num);
1245 for (int i = 0; i < num; i++)
1246 m_locs[i] = locs[i];
1249 /* string_concat_db's constructor. */
1251 string_concat_db::string_concat_db ()
1253 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1256 /* Record that a string concatenation occurred, covering NUM
1257 string literal tokens. LOCS is an array of size NUM, containing the
1258 locations of the tokens. A copy of LOCS is taken. */
1260 void
1261 string_concat_db::record_string_concatenation (int num, location_t *locs)
1263 gcc_assert (num > 1);
1264 gcc_assert (locs);
1266 location_t key_loc = get_key_loc (locs[0]);
1268 string_concat *concat
1269 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1270 m_table->put (key_loc, concat);
1273 /* Determine if LOC was the location of the the initial token of a
1274 concatenation of string literal tokens.
1275 If so, *OUT_NUM is written to with the number of tokens, and
1276 *OUT_LOCS with the location of an array of locations of the
1277 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1278 storage owned by the string_concat_db.
1279 Otherwise, return false. */
1281 bool
1282 string_concat_db::get_string_concatenation (location_t loc,
1283 int *out_num,
1284 location_t **out_locs)
1286 gcc_assert (out_num);
1287 gcc_assert (out_locs);
1289 location_t key_loc = get_key_loc (loc);
1291 string_concat **concat = m_table->get (key_loc);
1292 if (!concat)
1293 return false;
1295 *out_num = (*concat)->m_num;
1296 *out_locs =(*concat)->m_locs;
1297 return true;
1300 /* Internal function. Canonicalize LOC into a form suitable for
1301 use as a key within the database, stripping away macro expansion,
1302 ad-hoc information, and range information, using the location of
1303 the start of LOC within an ordinary linemap. */
1305 location_t
1306 string_concat_db::get_key_loc (location_t loc)
1308 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1309 NULL);
1311 loc = get_range_from_loc (line_table, loc).m_start;
1313 return loc;
1316 /* Helper class for use within get_substring_ranges_for_loc.
1317 An vec of cpp_string with responsibility for releasing all of the
1318 str->text for each str in the vector. */
1320 class auto_cpp_string_vec : public auto_vec <cpp_string>
1322 public:
1323 auto_cpp_string_vec (int alloc)
1324 : auto_vec <cpp_string> (alloc) {}
1326 ~auto_cpp_string_vec ()
1328 /* Clean up the copies within this vec. */
1329 int i;
1330 cpp_string *str;
1331 FOR_EACH_VEC_ELT (*this, i, str)
1332 free (const_cast <unsigned char *> (str->text));
1336 /* Attempt to populate RANGES with source location information on the
1337 individual characters within the string literal found at STRLOC.
1338 If CONCATS is non-NULL, then any string literals that the token at
1339 STRLOC was concatenated with are also added to RANGES.
1341 Return NULL if successful, or an error message if any errors occurred (in
1342 which case RANGES may be only partially populated and should not
1343 be used).
1345 This is implemented by re-parsing the relevant source line(s). */
1347 static const char *
1348 get_substring_ranges_for_loc (cpp_reader *pfile,
1349 string_concat_db *concats,
1350 location_t strloc,
1351 enum cpp_ttype type,
1352 cpp_substring_ranges &ranges)
1354 gcc_assert (pfile);
1356 if (strloc == UNKNOWN_LOCATION)
1357 return "unknown location";
1359 /* Reparsing the strings requires accurate location information.
1360 If -ftrack-macro-expansion has been overridden from its default
1361 of 2, then we might have a location of a macro expansion point,
1362 rather than the location of the literal itself.
1363 Avoid this by requiring that we have full macro expansion tracking
1364 for substring locations to be available. */
1365 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1366 return "track_macro_expansion != 2";
1368 /* If #line or # 44 "file"-style directives are present, then there's
1369 no guarantee that the line numbers we have can be used to locate
1370 the strings. For example, we might have a .i file with # directives
1371 pointing back to lines within a .c file, but the .c file might
1372 have been edited since the .i file was created.
1373 In such a case, the safest course is to disable on-demand substring
1374 locations. */
1375 if (line_table->seen_line_directive)
1376 return "seen line directive";
1378 /* If string concatenation has occurred at STRLOC, get the locations
1379 of all of the literal tokens making up the compound string.
1380 Otherwise, just use STRLOC. */
1381 int num_locs = 1;
1382 location_t *strlocs = &strloc;
1383 if (concats)
1384 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1386 auto_cpp_string_vec strs (num_locs);
1387 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1388 for (int i = 0; i < num_locs; i++)
1390 /* Get range of strloc. We will use it to locate the start and finish
1391 of the literal token within the line. */
1392 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1394 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1395 /* If the string is within a macro expansion, we can't get at the
1396 end location. */
1397 return "macro expansion";
1399 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1400 /* If so, we can't reliably determine where the token started within
1401 its line. */
1402 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1404 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1405 /* If so, we can't reliably determine where the token finished within
1406 its line. */
1407 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1409 expanded_location start
1410 = expand_location_to_spelling_point (src_range.m_start);
1411 expanded_location finish
1412 = expand_location_to_spelling_point (src_range.m_finish);
1413 if (start.file != finish.file)
1414 return "range endpoints are in different files";
1415 if (start.line != finish.line)
1416 return "range endpoints are on different lines";
1417 if (start.column > finish.column)
1418 return "range endpoints are reversed";
1420 int line_width;
1421 const char *line = location_get_source_line (start.file, start.line,
1422 &line_width);
1423 if (line == NULL)
1424 return "unable to read source line";
1426 /* Determine the location of the literal (including quotes
1427 and leading prefix chars, such as the 'u' in a u""
1428 token). */
1429 const char *literal = line + start.column - 1;
1430 int literal_length = finish.column - start.column + 1;
1432 /* Ensure that we don't crash if we got the wrong location. */
1433 if (line_width < (start.column - 1 + literal_length))
1434 return "line is not wide enough";
1436 cpp_string from;
1437 from.len = literal_length;
1438 /* Make a copy of the literal, to avoid having to rely on
1439 the lifetime of the copy of the line within the cache.
1440 This will be released by the auto_cpp_string_vec dtor. */
1441 from.text = XDUPVEC (unsigned char, literal, literal_length);
1442 strs.safe_push (from);
1444 /* For very long lines, a new linemap could have started
1445 halfway through the token.
1446 Ensure that the loc_reader uses the linemap of the
1447 *end* of the token for its start location. */
1448 const line_map_ordinary *final_ord_map;
1449 linemap_resolve_location (line_table, src_range.m_finish,
1450 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1451 location_t start_loc
1452 = linemap_position_for_line_and_column (line_table, final_ord_map,
1453 start.line, start.column);
1455 cpp_string_location_reader loc_reader (start_loc, line_table);
1456 loc_readers.safe_push (loc_reader);
1459 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1460 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1461 loc_readers.address (),
1462 num_locs, &ranges, type);
1463 if (err)
1464 return err;
1466 /* Success: "ranges" should now contain information on the string. */
1467 return NULL;
1470 /* Attempt to populate *OUT_LOC with source location information on the
1471 given characters within the string literal found at STRLOC.
1472 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1473 character set.
1475 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1476 and string literal "012345\n789"
1477 *OUT_LOC is written to with:
1478 "012345\n789"
1479 ~^~~~~
1481 If CONCATS is non-NULL, then any string literals that the token at
1482 STRLOC was concatenated with are also considered.
1484 This is implemented by re-parsing the relevant source line(s).
1486 Return NULL if successful, or an error message if any errors occurred.
1487 Error messages are intended for GCC developers (to help debugging) rather
1488 than for end-users. */
1490 const char *
1491 get_source_location_for_substring (cpp_reader *pfile,
1492 string_concat_db *concats,
1493 location_t strloc,
1494 enum cpp_ttype type,
1495 int caret_idx, int start_idx, int end_idx,
1496 source_location *out_loc)
1498 gcc_checking_assert (caret_idx >= 0);
1499 gcc_checking_assert (start_idx >= 0);
1500 gcc_checking_assert (end_idx >= 0);
1501 gcc_assert (out_loc);
1503 cpp_substring_ranges ranges;
1504 const char *err
1505 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1506 if (err)
1507 return err;
1509 if (caret_idx >= ranges.get_num_ranges ())
1510 return "caret_idx out of range";
1511 if (start_idx >= ranges.get_num_ranges ())
1512 return "start_idx out of range";
1513 if (end_idx >= ranges.get_num_ranges ())
1514 return "end_idx out of range";
1516 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1517 ranges.get_range (start_idx).m_start,
1518 ranges.get_range (end_idx).m_finish);
1519 return NULL;
1522 #if CHECKING_P
1524 namespace selftest {
1526 /* Selftests of location handling. */
1528 /* Attempt to populate *OUT_RANGE with source location information on the
1529 given character within the string literal found at STRLOC.
1530 CHAR_IDX refers to an offset within the execution character set.
1531 If CONCATS is non-NULL, then any string literals that the token at
1532 STRLOC was concatenated with are also considered.
1534 This is implemented by re-parsing the relevant source line(s).
1536 Return NULL if successful, or an error message if any errors occurred.
1537 Error messages are intended for GCC developers (to help debugging) rather
1538 than for end-users. */
1540 static const char *
1541 get_source_range_for_char (cpp_reader *pfile,
1542 string_concat_db *concats,
1543 location_t strloc,
1544 enum cpp_ttype type,
1545 int char_idx,
1546 source_range *out_range)
1548 gcc_checking_assert (char_idx >= 0);
1549 gcc_assert (out_range);
1551 cpp_substring_ranges ranges;
1552 const char *err
1553 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1554 if (err)
1555 return err;
1557 if (char_idx >= ranges.get_num_ranges ())
1558 return "char_idx out of range";
1560 *out_range = ranges.get_range (char_idx);
1561 return NULL;
1564 /* As get_source_range_for_char, but write to *OUT the number
1565 of ranges that are available. */
1567 static const char *
1568 get_num_source_ranges_for_substring (cpp_reader *pfile,
1569 string_concat_db *concats,
1570 location_t strloc,
1571 enum cpp_ttype type,
1572 int *out)
1574 gcc_assert (out);
1576 cpp_substring_ranges ranges;
1577 const char *err
1578 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1580 if (err)
1581 return err;
1583 *out = ranges.get_num_ranges ();
1584 return NULL;
1587 /* Selftests of location handling. */
1589 /* Helper function for verifying location data: when location_t
1590 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1591 as having column 0. */
1593 static bool
1594 should_have_column_data_p (location_t loc)
1596 if (IS_ADHOC_LOC (loc))
1597 loc = get_location_from_adhoc_loc (line_table, loc);
1598 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1599 return false;
1600 return true;
1603 /* Selftest for should_have_column_data_p. */
1605 static void
1606 test_should_have_column_data_p ()
1608 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1609 ASSERT_TRUE
1610 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1611 ASSERT_FALSE
1612 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1615 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1616 on LOC. */
1618 static void
1619 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1620 location_t loc)
1622 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1623 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1624 /* If location_t values are sufficiently high, then column numbers
1625 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1626 When close to the threshold, column numbers *may* be present: if
1627 the final linemap before the threshold contains a line that straddles
1628 the threshold, locations in that line have column information. */
1629 if (should_have_column_data_p (loc))
1630 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1633 /* Various selftests involve constructing a line table and one or more
1634 line maps within it.
1636 For maximum test coverage we want to run these tests with a variety
1637 of situations:
1638 - line_table->default_range_bits: some frontends use a non-zero value
1639 and others use zero
1640 - the fallback modes within line-map.c: there are various threshold
1641 values for source_location/location_t beyond line-map.c changes
1642 behavior (disabling of the range-packing optimization, disabling
1643 of column-tracking). We can exercise these by starting the line_table
1644 at interesting values at or near these thresholds.
1646 The following struct describes a particular case within our test
1647 matrix. */
1649 struct line_table_case
1651 line_table_case (int default_range_bits, int base_location)
1652 : m_default_range_bits (default_range_bits),
1653 m_base_location (base_location)
1656 int m_default_range_bits;
1657 int m_base_location;
1660 /* Constructor. Store the old value of line_table, and create a new
1661 one, using sane defaults. */
1663 line_table_test::line_table_test ()
1665 gcc_assert (saved_line_table == NULL);
1666 saved_line_table = line_table;
1667 line_table = ggc_alloc<line_maps> ();
1668 linemap_init (line_table, BUILTINS_LOCATION);
1669 gcc_assert (saved_line_table->reallocator);
1670 line_table->reallocator = saved_line_table->reallocator;
1671 gcc_assert (saved_line_table->round_alloc_size);
1672 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1673 line_table->default_range_bits = 0;
1676 /* Constructor. Store the old value of line_table, and create a new
1677 one, using the sitation described in CASE_. */
1679 line_table_test::line_table_test (const line_table_case &case_)
1681 gcc_assert (saved_line_table == NULL);
1682 saved_line_table = line_table;
1683 line_table = ggc_alloc<line_maps> ();
1684 linemap_init (line_table, BUILTINS_LOCATION);
1685 gcc_assert (saved_line_table->reallocator);
1686 line_table->reallocator = saved_line_table->reallocator;
1687 gcc_assert (saved_line_table->round_alloc_size);
1688 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1689 line_table->default_range_bits = case_.m_default_range_bits;
1690 if (case_.m_base_location)
1692 line_table->highest_location = case_.m_base_location;
1693 line_table->highest_line = case_.m_base_location;
1697 /* Destructor. Restore the old value of line_table. */
1699 line_table_test::~line_table_test ()
1701 gcc_assert (saved_line_table != NULL);
1702 line_table = saved_line_table;
1703 saved_line_table = NULL;
1706 /* Verify basic operation of ordinary linemaps. */
1708 static void
1709 test_accessing_ordinary_linemaps (const line_table_case &case_)
1711 line_table_test ltt (case_);
1713 /* Build a simple linemap describing some locations. */
1714 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1716 linemap_line_start (line_table, 1, 100);
1717 location_t loc_a = linemap_position_for_column (line_table, 1);
1718 location_t loc_b = linemap_position_for_column (line_table, 23);
1720 linemap_line_start (line_table, 2, 100);
1721 location_t loc_c = linemap_position_for_column (line_table, 1);
1722 location_t loc_d = linemap_position_for_column (line_table, 17);
1724 /* Example of a very long line. */
1725 linemap_line_start (line_table, 3, 2000);
1726 location_t loc_e = linemap_position_for_column (line_table, 700);
1728 /* Transitioning back to a short line. */
1729 linemap_line_start (line_table, 4, 0);
1730 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1732 if (should_have_column_data_p (loc_back_to_short))
1734 /* Verify that we switched to short lines in the linemap. */
1735 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1736 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1739 /* Example of a line that will eventually be seen to be longer
1740 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1741 below that. */
1742 linemap_line_start (line_table, 5, 2000);
1744 location_t loc_start_of_very_long_line
1745 = linemap_position_for_column (line_table, 2000);
1746 location_t loc_too_wide
1747 = linemap_position_for_column (line_table, 4097);
1748 location_t loc_too_wide_2
1749 = linemap_position_for_column (line_table, 4098);
1751 /* ...and back to a sane line length. */
1752 linemap_line_start (line_table, 6, 100);
1753 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1755 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1757 /* Multiple files. */
1758 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1759 linemap_line_start (line_table, 1, 200);
1760 location_t loc_f = linemap_position_for_column (line_table, 150);
1761 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1763 /* Verify that we can recover the location info. */
1764 assert_loceq ("foo.c", 1, 1, loc_a);
1765 assert_loceq ("foo.c", 1, 23, loc_b);
1766 assert_loceq ("foo.c", 2, 1, loc_c);
1767 assert_loceq ("foo.c", 2, 17, loc_d);
1768 assert_loceq ("foo.c", 3, 700, loc_e);
1769 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1771 /* In the very wide line, the initial location should be fully tracked. */
1772 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1773 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1774 be disabled. */
1775 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1776 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1777 /*...and column-tracking should be re-enabled for subsequent lines. */
1778 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1780 assert_loceq ("bar.c", 1, 150, loc_f);
1782 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1783 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1785 /* Verify using make_location to build a range, and extracting data
1786 back from it. */
1787 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1788 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1789 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1790 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1791 ASSERT_EQ (loc_b, src_range.m_start);
1792 ASSERT_EQ (loc_d, src_range.m_finish);
1795 /* Verify various properties of UNKNOWN_LOCATION. */
1797 static void
1798 test_unknown_location ()
1800 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1801 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1802 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1805 /* Verify various properties of BUILTINS_LOCATION. */
1807 static void
1808 test_builtins ()
1810 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1811 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1814 /* Regression test for make_location.
1815 Ensure that we use pure locations for the start/finish of the range,
1816 rather than storing a packed or ad-hoc range as the start/finish. */
1818 static void
1819 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1821 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1822 with C++ frontend.
1823 ....................0000000001111111111222.
1824 ....................1234567890123456789012. */
1825 const char *content = " r += !aaa == bbb;\n";
1826 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1827 line_table_test ltt (case_);
1828 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1830 const location_t c11 = linemap_position_for_column (line_table, 11);
1831 const location_t c12 = linemap_position_for_column (line_table, 12);
1832 const location_t c13 = linemap_position_for_column (line_table, 13);
1833 const location_t c14 = linemap_position_for_column (line_table, 14);
1834 const location_t c21 = linemap_position_for_column (line_table, 21);
1836 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1837 return;
1839 /* Use column 13 for the caret location, arbitrarily, to verify that we
1840 handle start != caret. */
1841 const location_t aaa = make_location (c13, c12, c14);
1842 ASSERT_EQ (c13, get_pure_location (aaa));
1843 ASSERT_EQ (c12, get_start (aaa));
1844 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1845 ASSERT_EQ (c14, get_finish (aaa));
1846 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1848 /* Make a location using a location with a range as the start-point. */
1849 const location_t not_aaa = make_location (c11, aaa, c14);
1850 ASSERT_EQ (c11, get_pure_location (not_aaa));
1851 /* It should use the start location of the range, not store the range
1852 itself. */
1853 ASSERT_EQ (c12, get_start (not_aaa));
1854 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1855 ASSERT_EQ (c14, get_finish (not_aaa));
1856 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1858 /* Similarly, make a location with a range as the end-point. */
1859 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1860 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1861 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1862 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1863 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1864 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1865 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1866 /* It should use the finish location of the range, not store the range
1867 itself. */
1868 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1869 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1870 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1871 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1872 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1875 /* Verify reading of input files (e.g. for caret-based diagnostics). */
1877 static void
1878 test_reading_source_line ()
1880 /* Create a tempfile and write some text to it. */
1881 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1882 "01234567890123456789\n"
1883 "This is the test text\n"
1884 "This is the 3rd line");
1886 /* Read back a specific line from the tempfile. */
1887 int line_size;
1888 const char *source_line = location_get_source_line (tmp.get_filename (),
1889 3, &line_size);
1890 ASSERT_TRUE (source_line != NULL);
1891 ASSERT_EQ (20, line_size);
1892 ASSERT_TRUE (!strncmp ("This is the 3rd line",
1893 source_line, line_size));
1895 source_line = location_get_source_line (tmp.get_filename (),
1896 2, &line_size);
1897 ASSERT_TRUE (source_line != NULL);
1898 ASSERT_EQ (21, line_size);
1899 ASSERT_TRUE (!strncmp ("This is the test text",
1900 source_line, line_size));
1902 source_line = location_get_source_line (tmp.get_filename (),
1903 4, &line_size);
1904 ASSERT_TRUE (source_line == NULL);
1907 /* Tests of lexing. */
1909 /* Verify that token TOK from PARSER has cpp_token_as_text
1910 equal to EXPECTED_TEXT. */
1912 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1913 SELFTEST_BEGIN_STMT \
1914 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1915 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1916 SELFTEST_END_STMT
1918 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1919 and ranges from EXP_START_COL to EXP_FINISH_COL.
1920 Use LOC as the effective location of the selftest. */
1922 static void
1923 assert_token_loc_eq (const location &loc,
1924 const cpp_token *tok,
1925 const char *exp_filename, int exp_linenum,
1926 int exp_start_col, int exp_finish_col)
1928 location_t tok_loc = tok->src_loc;
1929 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1930 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1932 /* If location_t values are sufficiently high, then column numbers
1933 will be unavailable. */
1934 if (!should_have_column_data_p (tok_loc))
1935 return;
1937 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1938 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1939 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1940 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1943 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1944 SELFTEST_LOCATION as the effective location of the selftest. */
1946 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1947 EXP_START_COL, EXP_FINISH_COL) \
1948 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1949 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1951 /* Test of lexing a file using libcpp, verifying tokens and their
1952 location information. */
1954 static void
1955 test_lexer (const line_table_case &case_)
1957 /* Create a tempfile and write some text to it. */
1958 const char *content =
1959 /*00000000011111111112222222222333333.3333444444444.455555555556
1960 12345678901234567890123456789012345.6789012345678.901234567890. */
1961 ("test_name /* c-style comment */\n"
1962 " \"test literal\"\n"
1963 " // test c++-style comment\n"
1964 " 42\n");
1965 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1967 line_table_test ltt (case_);
1969 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1971 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1972 ASSERT_NE (fname, NULL);
1974 /* Verify that we get the expected tokens back, with the correct
1975 location information. */
1977 location_t loc;
1978 const cpp_token *tok;
1979 tok = cpp_get_token_with_location (parser, &loc);
1980 ASSERT_NE (tok, NULL);
1981 ASSERT_EQ (tok->type, CPP_NAME);
1982 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1983 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1985 tok = cpp_get_token_with_location (parser, &loc);
1986 ASSERT_NE (tok, NULL);
1987 ASSERT_EQ (tok->type, CPP_STRING);
1988 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1989 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1991 tok = cpp_get_token_with_location (parser, &loc);
1992 ASSERT_NE (tok, NULL);
1993 ASSERT_EQ (tok->type, CPP_NUMBER);
1994 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1995 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1997 tok = cpp_get_token_with_location (parser, &loc);
1998 ASSERT_NE (tok, NULL);
1999 ASSERT_EQ (tok->type, CPP_EOF);
2001 cpp_finish (parser, NULL);
2002 cpp_destroy (parser);
2005 /* Forward decls. */
2007 struct lexer_test;
2008 class lexer_test_options;
2010 /* A class for specifying options of a lexer_test.
2011 The "apply" vfunc is called during the lexer_test constructor. */
2013 class lexer_test_options
2015 public:
2016 virtual void apply (lexer_test &) = 0;
2019 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2020 in its dtor.
2022 This is needed by struct lexer_test to ensure that the cleanup of the
2023 cpp_reader happens *after* the cleanup of the temp_source_file. */
2025 class cpp_reader_ptr
2027 public:
2028 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2030 ~cpp_reader_ptr ()
2032 cpp_finish (m_ptr, NULL);
2033 cpp_destroy (m_ptr);
2036 operator cpp_reader * () const { return m_ptr; }
2038 private:
2039 cpp_reader *m_ptr;
2042 /* A struct for writing lexer tests. */
2044 struct lexer_test
2046 lexer_test (const line_table_case &case_, const char *content,
2047 lexer_test_options *options);
2048 ~lexer_test ();
2050 const cpp_token *get_token ();
2052 /* The ordering of these fields matters.
2053 The line_table_test must be first, since the cpp_reader_ptr
2054 uses it.
2055 The cpp_reader must be cleaned up *after* the temp_source_file
2056 since the filenames in input.c's input cache are owned by the
2057 cpp_reader; in particular, when ~temp_source_file evicts the
2058 filename the filenames must still be alive. */
2059 line_table_test m_ltt;
2060 cpp_reader_ptr m_parser;
2061 temp_source_file m_tempfile;
2062 string_concat_db m_concats;
2063 bool m_implicitly_expect_EOF;
2066 /* Use an EBCDIC encoding for the execution charset, specifically
2067 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2069 This exercises iconv integration within libcpp.
2070 Not every build of iconv supports the given charset,
2071 so we need to flag this error and handle it gracefully. */
2073 class ebcdic_execution_charset : public lexer_test_options
2075 public:
2076 ebcdic_execution_charset () : m_num_iconv_errors (0)
2078 gcc_assert (s_singleton == NULL);
2079 s_singleton = this;
2081 ~ebcdic_execution_charset ()
2083 gcc_assert (s_singleton == this);
2084 s_singleton = NULL;
2087 void apply (lexer_test &test) FINAL OVERRIDE
2089 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2090 cpp_opts->narrow_charset = "IBM1047";
2092 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2093 callbacks->error = on_error;
2096 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2097 int level ATTRIBUTE_UNUSED,
2098 int reason ATTRIBUTE_UNUSED,
2099 rich_location *richloc ATTRIBUTE_UNUSED,
2100 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2101 ATTRIBUTE_FPTR_PRINTF(5,0)
2103 gcc_assert (s_singleton);
2104 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2105 const char *msg = "conversion from %s to %s not supported by iconv";
2106 #ifdef ENABLE_NLS
2107 msg = dgettext ("cpplib", msg);
2108 #endif
2109 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2110 when the local iconv build doesn't support the conversion. */
2111 if (strcmp (msgid, msg) == 0)
2113 s_singleton->m_num_iconv_errors++;
2114 return true;
2117 /* Otherwise, we have an unexpected error. */
2118 abort ();
2121 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2123 private:
2124 static ebcdic_execution_charset *s_singleton;
2125 int m_num_iconv_errors;
2128 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2130 /* A lexer_test_options subclass that records a list of error
2131 messages emitted by the lexer. */
2133 class lexer_error_sink : public lexer_test_options
2135 public:
2136 lexer_error_sink ()
2138 gcc_assert (s_singleton == NULL);
2139 s_singleton = this;
2141 ~lexer_error_sink ()
2143 gcc_assert (s_singleton == this);
2144 s_singleton = NULL;
2146 int i;
2147 char *str;
2148 FOR_EACH_VEC_ELT (m_errors, i, str)
2149 free (str);
2152 void apply (lexer_test &test) FINAL OVERRIDE
2154 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2155 callbacks->error = on_error;
2158 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2159 int level ATTRIBUTE_UNUSED,
2160 int reason ATTRIBUTE_UNUSED,
2161 rich_location *richloc ATTRIBUTE_UNUSED,
2162 const char *msgid, va_list *ap)
2163 ATTRIBUTE_FPTR_PRINTF(5,0)
2165 char *msg = xvasprintf (msgid, *ap);
2166 s_singleton->m_errors.safe_push (msg);
2167 return true;
2170 auto_vec<char *> m_errors;
2172 private:
2173 static lexer_error_sink *s_singleton;
2176 lexer_error_sink *lexer_error_sink::s_singleton;
2178 /* Constructor. Override line_table with a new instance based on CASE_,
2179 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2180 start parsing the tempfile. */
2182 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2183 lexer_test_options *options)
2184 : m_ltt (case_),
2185 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2186 /* Create a tempfile and write the text to it. */
2187 m_tempfile (SELFTEST_LOCATION, ".c", content),
2188 m_concats (),
2189 m_implicitly_expect_EOF (true)
2191 if (options)
2192 options->apply (*this);
2194 cpp_init_iconv (m_parser);
2196 /* Parse the file. */
2197 const char *fname = cpp_read_main_file (m_parser,
2198 m_tempfile.get_filename ());
2199 ASSERT_NE (fname, NULL);
2202 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2204 lexer_test::~lexer_test ()
2206 location_t loc;
2207 const cpp_token *tok;
2209 if (m_implicitly_expect_EOF)
2211 tok = cpp_get_token_with_location (m_parser, &loc);
2212 ASSERT_NE (tok, NULL);
2213 ASSERT_EQ (tok->type, CPP_EOF);
2217 /* Get the next token from m_parser. */
2219 const cpp_token *
2220 lexer_test::get_token ()
2222 location_t loc;
2223 const cpp_token *tok;
2225 tok = cpp_get_token_with_location (m_parser, &loc);
2226 ASSERT_NE (tok, NULL);
2227 return tok;
2230 /* Verify that locations within string literals are correctly handled. */
2232 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2233 using the string concatenation database for TEST.
2235 Assert that the character at index IDX is on EXPECTED_LINE,
2236 and that it begins at column EXPECTED_START_COL and ends at
2237 EXPECTED_FINISH_COL (unless the locations are beyond
2238 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2239 columns). */
2241 static void
2242 assert_char_at_range (const location &loc,
2243 lexer_test& test,
2244 location_t strloc, enum cpp_ttype type, int idx,
2245 int expected_line, int expected_start_col,
2246 int expected_finish_col)
2248 cpp_reader *pfile = test.m_parser;
2249 string_concat_db *concats = &test.m_concats;
2251 source_range actual_range = source_range();
2252 const char *err
2253 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2254 &actual_range);
2255 if (should_have_column_data_p (strloc))
2256 ASSERT_EQ_AT (loc, NULL, err);
2257 else
2259 ASSERT_STREQ_AT (loc,
2260 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2261 err);
2262 return;
2265 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2266 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2267 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2268 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2270 if (should_have_column_data_p (actual_range.m_start))
2272 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2273 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2275 if (should_have_column_data_p (actual_range.m_finish))
2277 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2278 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2282 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2283 the effective location of any errors. */
2285 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2286 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2287 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2288 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2289 (EXPECTED_FINISH_COL))
2291 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2292 using the string concatenation database for TEST.
2294 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2296 static void
2297 assert_num_substring_ranges (const location &loc,
2298 lexer_test& test,
2299 location_t strloc,
2300 enum cpp_ttype type,
2301 int expected_num_ranges)
2303 cpp_reader *pfile = test.m_parser;
2304 string_concat_db *concats = &test.m_concats;
2306 int actual_num_ranges = -1;
2307 const char *err
2308 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2309 &actual_num_ranges);
2310 if (should_have_column_data_p (strloc))
2311 ASSERT_EQ_AT (loc, NULL, err);
2312 else
2314 ASSERT_STREQ_AT (loc,
2315 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2316 err);
2317 return;
2319 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2322 /* Macro for calling assert_num_substring_ranges, supplying
2323 SELFTEST_LOCATION for the effective location of any errors. */
2325 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2326 EXPECTED_NUM_RANGES) \
2327 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2328 (TYPE), (EXPECTED_NUM_RANGES))
2331 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2332 returns an error (using the string concatenation database for TEST). */
2334 static void
2335 assert_has_no_substring_ranges (const location &loc,
2336 lexer_test& test,
2337 location_t strloc,
2338 enum cpp_ttype type,
2339 const char *expected_err)
2341 cpp_reader *pfile = test.m_parser;
2342 string_concat_db *concats = &test.m_concats;
2343 cpp_substring_ranges ranges;
2344 const char *actual_err
2345 = get_substring_ranges_for_loc (pfile, concats, strloc,
2346 type, ranges);
2347 if (should_have_column_data_p (strloc))
2348 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2349 else
2350 ASSERT_STREQ_AT (loc,
2351 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2352 actual_err);
2355 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2356 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2357 (STRLOC), (TYPE), (ERR))
2359 /* Lex a simple string literal. Verify the substring location data, before
2360 and after running cpp_interpret_string on it. */
2362 static void
2363 test_lexer_string_locations_simple (const line_table_case &case_)
2365 /* Digits 0-9 (with 0 at column 10), the simple way.
2366 ....................000000000.11111111112.2222222223333333333
2367 ....................123456789.01234567890.1234567890123456789
2368 We add a trailing comment to ensure that we correctly locate
2369 the end of the string literal token. */
2370 const char *content = " \"0123456789\" /* not a string */\n";
2371 lexer_test test (case_, content, NULL);
2373 /* Verify that we get the expected token back, with the correct
2374 location information. */
2375 const cpp_token *tok = test.get_token ();
2376 ASSERT_EQ (tok->type, CPP_STRING);
2377 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2378 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2380 /* At this point in lexing, the quote characters are treated as part of
2381 the string (they are stripped off by cpp_interpret_string). */
2383 ASSERT_EQ (tok->val.str.len, 12);
2385 /* Verify that cpp_interpret_string works. */
2386 cpp_string dst_string;
2387 const enum cpp_ttype type = CPP_STRING;
2388 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2389 &dst_string, type);
2390 ASSERT_TRUE (result);
2391 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2392 free (const_cast <unsigned char *> (dst_string.text));
2394 /* Verify ranges of individual characters. This no longer includes the
2395 opening quote, but does include the closing quote. */
2396 for (int i = 0; i <= 10; i++)
2397 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2398 10 + i, 10 + i);
2400 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2403 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2404 encoding. */
2406 static void
2407 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2409 /* EBCDIC support requires iconv. */
2410 if (!HAVE_ICONV)
2411 return;
2413 /* Digits 0-9 (with 0 at column 10), the simple way.
2414 ....................000000000.11111111112.2222222223333333333
2415 ....................123456789.01234567890.1234567890123456789
2416 We add a trailing comment to ensure that we correctly locate
2417 the end of the string literal token. */
2418 const char *content = " \"0123456789\" /* not a string */\n";
2419 ebcdic_execution_charset use_ebcdic;
2420 lexer_test test (case_, content, &use_ebcdic);
2422 /* Verify that we get the expected token back, with the correct
2423 location information. */
2424 const cpp_token *tok = test.get_token ();
2425 ASSERT_EQ (tok->type, CPP_STRING);
2426 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2427 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2429 /* At this point in lexing, the quote characters are treated as part of
2430 the string (they are stripped off by cpp_interpret_string). */
2432 ASSERT_EQ (tok->val.str.len, 12);
2434 /* The remainder of the test requires an iconv implementation that
2435 can convert from UTF-8 to the EBCDIC encoding requested above. */
2436 if (use_ebcdic.iconv_errors_occurred_p ())
2437 return;
2439 /* Verify that cpp_interpret_string works. */
2440 cpp_string dst_string;
2441 const enum cpp_ttype type = CPP_STRING;
2442 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2443 &dst_string, type);
2444 ASSERT_TRUE (result);
2445 /* We should now have EBCDIC-encoded text, specifically
2446 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2447 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2448 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2449 (const char *)dst_string.text);
2450 free (const_cast <unsigned char *> (dst_string.text));
2452 /* Verify that we don't attempt to record substring location information
2453 for such cases. */
2454 ASSERT_HAS_NO_SUBSTRING_RANGES
2455 (test, tok->src_loc, type,
2456 "execution character set != source character set");
2459 /* Lex a string literal containing a hex-escaped character.
2460 Verify the substring location data, before and after running
2461 cpp_interpret_string on it. */
2463 static void
2464 test_lexer_string_locations_hex (const line_table_case &case_)
2466 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2467 and with a space in place of digit 6, to terminate the escaped
2468 hex code.
2469 ....................000000000.111111.11112222.
2470 ....................123456789.012345.67890123. */
2471 const char *content = " \"01234\\x35 789\"\n";
2472 lexer_test test (case_, content, NULL);
2474 /* Verify that we get the expected token back, with the correct
2475 location information. */
2476 const cpp_token *tok = test.get_token ();
2477 ASSERT_EQ (tok->type, CPP_STRING);
2478 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2479 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2481 /* At this point in lexing, the quote characters are treated as part of
2482 the string (they are stripped off by cpp_interpret_string). */
2483 ASSERT_EQ (tok->val.str.len, 15);
2485 /* Verify that cpp_interpret_string works. */
2486 cpp_string dst_string;
2487 const enum cpp_ttype type = CPP_STRING;
2488 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2489 &dst_string, type);
2490 ASSERT_TRUE (result);
2491 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2492 free (const_cast <unsigned char *> (dst_string.text));
2494 /* Verify ranges of individual characters. This no longer includes the
2495 opening quote, but does include the closing quote. */
2496 for (int i = 0; i <= 4; i++)
2497 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2498 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2499 for (int i = 6; i <= 10; i++)
2500 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2502 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2505 /* Lex a string literal containing an octal-escaped character.
2506 Verify the substring location data after running cpp_interpret_string
2507 on it. */
2509 static void
2510 test_lexer_string_locations_oct (const line_table_case &case_)
2512 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2513 and with a space in place of digit 6, to terminate the escaped
2514 octal code.
2515 ....................000000000.111111.11112222.2222223333333333444
2516 ....................123456789.012345.67890123.4567890123456789012 */
2517 const char *content = " \"01234\\065 789\" /* not a string */\n";
2518 lexer_test test (case_, content, NULL);
2520 /* Verify that we get the expected token back, with the correct
2521 location information. */
2522 const cpp_token *tok = test.get_token ();
2523 ASSERT_EQ (tok->type, CPP_STRING);
2524 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2526 /* Verify that cpp_interpret_string works. */
2527 cpp_string dst_string;
2528 const enum cpp_ttype type = CPP_STRING;
2529 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2530 &dst_string, type);
2531 ASSERT_TRUE (result);
2532 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2533 free (const_cast <unsigned char *> (dst_string.text));
2535 /* Verify ranges of individual characters. This no longer includes the
2536 opening quote, but does include the closing quote. */
2537 for (int i = 0; i < 5; i++)
2538 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2539 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2540 for (int i = 6; i <= 10; i++)
2541 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2543 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2546 /* Test of string literal containing letter escapes. */
2548 static void
2549 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2551 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2552 .....................000000000.1.11111.1.1.11222.22222223333333
2553 .....................123456789.0.12345.6.7.89012.34567890123456. */
2554 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2555 lexer_test test (case_, content, NULL);
2557 /* Verify that we get the expected tokens back. */
2558 const cpp_token *tok = test.get_token ();
2559 ASSERT_EQ (tok->type, CPP_STRING);
2560 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2562 /* Verify ranges of individual characters. */
2563 /* "\t". */
2564 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2565 0, 1, 10, 11);
2566 /* "foo". */
2567 for (int i = 1; i <= 3; i++)
2568 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2569 i, 1, 11 + i, 11 + i);
2570 /* "\\" and "\n". */
2571 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2572 4, 1, 15, 16);
2573 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2574 5, 1, 17, 18);
2576 /* "bar" and closing quote for nul-terminator. */
2577 for (int i = 6; i <= 9; i++)
2578 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2579 i, 1, 13 + i, 13 + i);
2581 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2584 /* Another test of a string literal containing a letter escape.
2585 Based on string seen in
2586 printf ("%-%\n");
2587 in gcc.dg/format/c90-printf-1.c. */
2589 static void
2590 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2592 /* .....................000000000.1111.11.1111.22222222223.
2593 .....................123456789.0123.45.6789.01234567890. */
2594 const char *content = (" \"%-%\\n\" /* non-str */\n");
2595 lexer_test test (case_, content, NULL);
2597 /* Verify that we get the expected tokens back. */
2598 const cpp_token *tok = test.get_token ();
2599 ASSERT_EQ (tok->type, CPP_STRING);
2600 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2602 /* Verify ranges of individual characters. */
2603 /* "%-%". */
2604 for (int i = 0; i < 3; i++)
2605 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2606 i, 1, 10 + i, 10 + i);
2607 /* "\n". */
2608 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2609 3, 1, 13, 14);
2611 /* Closing quote for nul-terminator. */
2612 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2613 4, 1, 15, 15);
2615 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2618 /* Lex a string literal containing UCN 4 characters.
2619 Verify the substring location data after running cpp_interpret_string
2620 on it. */
2622 static void
2623 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2625 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2626 as UCN 4.
2627 ....................000000000.111111.111122.222222223.33333333344444
2628 ....................123456789.012345.678901.234567890.12345678901234 */
2629 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2630 lexer_test test (case_, content, NULL);
2632 /* Verify that we get the expected token back, with the correct
2633 location information. */
2634 const cpp_token *tok = test.get_token ();
2635 ASSERT_EQ (tok->type, CPP_STRING);
2636 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2638 /* Verify that cpp_interpret_string works.
2639 The string should be encoded in the execution character
2640 set. Assuming that that is UTF-8, we should have the following:
2641 ----------- ---- ----- ------- ----------------
2642 Byte offset Byte Octal Unicode Source Column(s)
2643 ----------- ---- ----- ------- ----------------
2644 0 0x30 '0' 10
2645 1 0x31 '1' 11
2646 2 0x32 '2' 12
2647 3 0x33 '3' 13
2648 4 0x34 '4' 14
2649 5 0xE2 \342 U+2174 15-20
2650 6 0x85 \205 (cont) 15-20
2651 7 0xB4 \264 (cont) 15-20
2652 8 0xE2 \342 U+2175 21-26
2653 9 0x85 \205 (cont) 21-26
2654 10 0xB5 \265 (cont) 21-26
2655 11 0x37 '7' 27
2656 12 0x38 '8' 28
2657 13 0x39 '9' 29
2658 14 0x00 30 (closing quote)
2659 ----------- ---- ----- ------- ---------------. */
2661 cpp_string dst_string;
2662 const enum cpp_ttype type = CPP_STRING;
2663 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2664 &dst_string, type);
2665 ASSERT_TRUE (result);
2666 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2667 (const char *)dst_string.text);
2668 free (const_cast <unsigned char *> (dst_string.text));
2670 /* Verify ranges of individual characters. This no longer includes the
2671 opening quote, but does include the closing quote.
2672 '01234'. */
2673 for (int i = 0; i <= 4; i++)
2674 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2675 /* U+2174. */
2676 for (int i = 5; i <= 7; i++)
2677 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2678 /* U+2175. */
2679 for (int i = 8; i <= 10; i++)
2680 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2681 /* '789' and nul terminator */
2682 for (int i = 11; i <= 14; i++)
2683 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2685 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2688 /* Lex a string literal containing UCN 8 characters.
2689 Verify the substring location data after running cpp_interpret_string
2690 on it. */
2692 static void
2693 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2695 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2696 ....................000000000.111111.1111222222.2222333333333.344444
2697 ....................123456789.012345.6789012345.6789012345678.901234 */
2698 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2699 lexer_test test (case_, content, NULL);
2701 /* Verify that we get the expected token back, with the correct
2702 location information. */
2703 const cpp_token *tok = test.get_token ();
2704 ASSERT_EQ (tok->type, CPP_STRING);
2705 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2706 "\"01234\\U00002174\\U00002175789\"");
2708 /* Verify that cpp_interpret_string works.
2709 The UTF-8 encoding of the string is identical to that from
2710 the ucn4 testcase above; the only difference is the column
2711 locations. */
2712 cpp_string dst_string;
2713 const enum cpp_ttype type = CPP_STRING;
2714 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2715 &dst_string, type);
2716 ASSERT_TRUE (result);
2717 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2718 (const char *)dst_string.text);
2719 free (const_cast <unsigned char *> (dst_string.text));
2721 /* Verify ranges of individual characters. This no longer includes the
2722 opening quote, but does include the closing quote.
2723 '01234'. */
2724 for (int i = 0; i <= 4; i++)
2725 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2726 /* U+2174. */
2727 for (int i = 5; i <= 7; i++)
2728 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2729 /* U+2175. */
2730 for (int i = 8; i <= 10; i++)
2731 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2732 /* '789' at columns 35-37 */
2733 for (int i = 11; i <= 13; i++)
2734 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2735 /* Closing quote/nul-terminator at column 38. */
2736 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2738 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2741 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2743 static uint32_t
2744 uint32_from_big_endian (const uint32_t *ptr_be_value)
2746 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2747 return (((uint32_t) buf[0] << 24)
2748 | ((uint32_t) buf[1] << 16)
2749 | ((uint32_t) buf[2] << 8)
2750 | (uint32_t) buf[3]);
2753 /* Lex a wide string literal and verify that attempts to read substring
2754 location data from it fail gracefully. */
2756 static void
2757 test_lexer_string_locations_wide_string (const line_table_case &case_)
2759 /* Digits 0-9.
2760 ....................000000000.11111111112.22222222233333
2761 ....................123456789.01234567890.12345678901234 */
2762 const char *content = " L\"0123456789\" /* non-str */\n";
2763 lexer_test test (case_, content, NULL);
2765 /* Verify that we get the expected token back, with the correct
2766 location information. */
2767 const cpp_token *tok = test.get_token ();
2768 ASSERT_EQ (tok->type, CPP_WSTRING);
2769 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2771 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2772 cpp_string dst_string;
2773 const enum cpp_ttype type = CPP_WSTRING;
2774 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2775 &dst_string, type);
2776 ASSERT_TRUE (result);
2777 /* The cpp_reader defaults to big-endian with
2778 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2779 now be encoded as UTF-32BE. */
2780 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2781 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2782 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2783 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2784 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2785 free (const_cast <unsigned char *> (dst_string.text));
2787 /* We don't yet support generating substring location information
2788 for L"" strings. */
2789 ASSERT_HAS_NO_SUBSTRING_RANGES
2790 (test, tok->src_loc, type,
2791 "execution character set != source character set");
2794 /* Fetch a big-endian 16-bit value and convert to host endianness. */
2796 static uint16_t
2797 uint16_from_big_endian (const uint16_t *ptr_be_value)
2799 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2800 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2803 /* Lex a u"" string literal and verify that attempts to read substring
2804 location data from it fail gracefully. */
2806 static void
2807 test_lexer_string_locations_string16 (const line_table_case &case_)
2809 /* Digits 0-9.
2810 ....................000000000.11111111112.22222222233333
2811 ....................123456789.01234567890.12345678901234 */
2812 const char *content = " u\"0123456789\" /* non-str */\n";
2813 lexer_test test (case_, content, NULL);
2815 /* Verify that we get the expected token back, with the correct
2816 location information. */
2817 const cpp_token *tok = test.get_token ();
2818 ASSERT_EQ (tok->type, CPP_STRING16);
2819 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2821 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2822 cpp_string dst_string;
2823 const enum cpp_ttype type = CPP_STRING16;
2824 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2825 &dst_string, type);
2826 ASSERT_TRUE (result);
2828 /* The cpp_reader defaults to big-endian, so dst_string should
2829 now be encoded as UTF-16BE. */
2830 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2831 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2832 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2833 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2834 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2835 free (const_cast <unsigned char *> (dst_string.text));
2837 /* We don't yet support generating substring location information
2838 for L"" strings. */
2839 ASSERT_HAS_NO_SUBSTRING_RANGES
2840 (test, tok->src_loc, type,
2841 "execution character set != source character set");
2844 /* Lex a U"" string literal and verify that attempts to read substring
2845 location data from it fail gracefully. */
2847 static void
2848 test_lexer_string_locations_string32 (const line_table_case &case_)
2850 /* Digits 0-9.
2851 ....................000000000.11111111112.22222222233333
2852 ....................123456789.01234567890.12345678901234 */
2853 const char *content = " U\"0123456789\" /* non-str */\n";
2854 lexer_test test (case_, content, NULL);
2856 /* Verify that we get the expected token back, with the correct
2857 location information. */
2858 const cpp_token *tok = test.get_token ();
2859 ASSERT_EQ (tok->type, CPP_STRING32);
2860 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2862 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2863 cpp_string dst_string;
2864 const enum cpp_ttype type = CPP_STRING32;
2865 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2866 &dst_string, type);
2867 ASSERT_TRUE (result);
2869 /* The cpp_reader defaults to big-endian, so dst_string should
2870 now be encoded as UTF-32BE. */
2871 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2872 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2873 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2874 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2875 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2876 free (const_cast <unsigned char *> (dst_string.text));
2878 /* We don't yet support generating substring location information
2879 for L"" strings. */
2880 ASSERT_HAS_NO_SUBSTRING_RANGES
2881 (test, tok->src_loc, type,
2882 "execution character set != source character set");
2885 /* Lex a u8-string literal.
2886 Verify the substring location data after running cpp_interpret_string
2887 on it. */
2889 static void
2890 test_lexer_string_locations_u8 (const line_table_case &case_)
2892 /* Digits 0-9.
2893 ....................000000000.11111111112.22222222233333
2894 ....................123456789.01234567890.12345678901234 */
2895 const char *content = " u8\"0123456789\" /* non-str */\n";
2896 lexer_test test (case_, content, NULL);
2898 /* Verify that we get the expected token back, with the correct
2899 location information. */
2900 const cpp_token *tok = test.get_token ();
2901 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2902 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2904 /* Verify that cpp_interpret_string works. */
2905 cpp_string dst_string;
2906 const enum cpp_ttype type = CPP_STRING;
2907 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2908 &dst_string, type);
2909 ASSERT_TRUE (result);
2910 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2911 free (const_cast <unsigned char *> (dst_string.text));
2913 /* Verify ranges of individual characters. This no longer includes the
2914 opening quote, but does include the closing quote. */
2915 for (int i = 0; i <= 10; i++)
2916 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2919 /* Lex a string literal containing UTF-8 source characters.
2920 Verify the substring location data after running cpp_interpret_string
2921 on it. */
2923 static void
2924 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2926 /* This string literal is written out to the source file as UTF-8,
2927 and is of the form "before mojibake after", where "mojibake"
2928 is written as the following four unicode code points:
2929 U+6587 CJK UNIFIED IDEOGRAPH-6587
2930 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2931 U+5316 CJK UNIFIED IDEOGRAPH-5316
2932 U+3051 HIRAGANA LETTER KE.
2933 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2934 "before" and "after" are 1 byte per unicode character.
2936 The numbering shown are "columns", which are *byte* numbers within
2937 the line, rather than unicode character numbers.
2939 .................... 000000000.1111111.
2940 .................... 123456789.0123456. */
2941 const char *content = (" \"before "
2942 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2943 UTF-8: 0xE6 0x96 0x87
2944 C octal escaped UTF-8: \346\226\207
2945 "column" numbers: 17-19. */
2946 "\346\226\207"
2948 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2949 UTF-8: 0xE5 0xAD 0x97
2950 C octal escaped UTF-8: \345\255\227
2951 "column" numbers: 20-22. */
2952 "\345\255\227"
2954 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2955 UTF-8: 0xE5 0x8C 0x96
2956 C octal escaped UTF-8: \345\214\226
2957 "column" numbers: 23-25. */
2958 "\345\214\226"
2960 /* U+3051 HIRAGANA LETTER KE
2961 UTF-8: 0xE3 0x81 0x91
2962 C octal escaped UTF-8: \343\201\221
2963 "column" numbers: 26-28. */
2964 "\343\201\221"
2966 /* column numbers 29 onwards
2967 2333333.33334444444444
2968 9012345.67890123456789. */
2969 " after\" /* non-str */\n");
2970 lexer_test test (case_, content, NULL);
2972 /* Verify that we get the expected token back, with the correct
2973 location information. */
2974 const cpp_token *tok = test.get_token ();
2975 ASSERT_EQ (tok->type, CPP_STRING);
2976 ASSERT_TOKEN_AS_TEXT_EQ
2977 (test.m_parser, tok,
2978 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2980 /* Verify that cpp_interpret_string works. */
2981 cpp_string dst_string;
2982 const enum cpp_ttype type = CPP_STRING;
2983 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2984 &dst_string, type);
2985 ASSERT_TRUE (result);
2986 ASSERT_STREQ
2987 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2988 (const char *)dst_string.text);
2989 free (const_cast <unsigned char *> (dst_string.text));
2991 /* Verify ranges of individual characters. This no longer includes the
2992 opening quote, but does include the closing quote.
2993 Assuming that both source and execution encodings are UTF-8, we have
2994 a run of 25 octets in each, plus the NUL terminator. */
2995 for (int i = 0; i < 25; i++)
2996 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2997 /* NUL-terminator should use the closing quote at column 35. */
2998 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3000 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3003 /* Test of string literal concatenation. */
3005 static void
3006 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3008 /* Digits 0-9.
3009 .....................000000000.111111.11112222222222
3010 .....................123456789.012345.67890123456789. */
3011 const char *content = (" \"01234\" /* non-str */\n"
3012 " \"56789\" /* non-str */\n");
3013 lexer_test test (case_, content, NULL);
3015 location_t input_locs[2];
3017 /* Verify that we get the expected tokens back. */
3018 auto_vec <cpp_string> input_strings;
3019 const cpp_token *tok_a = test.get_token ();
3020 ASSERT_EQ (tok_a->type, CPP_STRING);
3021 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3022 input_strings.safe_push (tok_a->val.str);
3023 input_locs[0] = tok_a->src_loc;
3025 const cpp_token *tok_b = test.get_token ();
3026 ASSERT_EQ (tok_b->type, CPP_STRING);
3027 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3028 input_strings.safe_push (tok_b->val.str);
3029 input_locs[1] = tok_b->src_loc;
3031 /* Verify that cpp_interpret_string works. */
3032 cpp_string dst_string;
3033 const enum cpp_ttype type = CPP_STRING;
3034 bool result = cpp_interpret_string (test.m_parser,
3035 input_strings.address (), 2,
3036 &dst_string, type);
3037 ASSERT_TRUE (result);
3038 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3039 free (const_cast <unsigned char *> (dst_string.text));
3041 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3042 test.m_concats.record_string_concatenation (2, input_locs);
3044 location_t initial_loc = input_locs[0];
3046 /* "01234" on line 1. */
3047 for (int i = 0; i <= 4; i++)
3048 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3049 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3050 for (int i = 5; i <= 10; i++)
3051 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3053 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3056 /* Another test of string literal concatenation. */
3058 static void
3059 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3061 /* Digits 0-9.
3062 .....................000000000.111.11111112222222
3063 .....................123456789.012.34567890123456. */
3064 const char *content = (" \"01\" /* non-str */\n"
3065 " \"23\" /* non-str */\n"
3066 " \"45\" /* non-str */\n"
3067 " \"67\" /* non-str */\n"
3068 " \"89\" /* non-str */\n");
3069 lexer_test test (case_, content, NULL);
3071 auto_vec <cpp_string> input_strings;
3072 location_t input_locs[5];
3074 /* Verify that we get the expected tokens back. */
3075 for (int i = 0; i < 5; i++)
3077 const cpp_token *tok = test.get_token ();
3078 ASSERT_EQ (tok->type, CPP_STRING);
3079 input_strings.safe_push (tok->val.str);
3080 input_locs[i] = tok->src_loc;
3083 /* Verify that cpp_interpret_string works. */
3084 cpp_string dst_string;
3085 const enum cpp_ttype type = CPP_STRING;
3086 bool result = cpp_interpret_string (test.m_parser,
3087 input_strings.address (), 5,
3088 &dst_string, type);
3089 ASSERT_TRUE (result);
3090 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3091 free (const_cast <unsigned char *> (dst_string.text));
3093 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3094 test.m_concats.record_string_concatenation (5, input_locs);
3096 location_t initial_loc = input_locs[0];
3098 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3099 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3100 and expect get_source_range_for_substring to fail.
3101 However, for a string concatenation test, we can have a case
3102 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3103 but subsequent strings can be after it.
3104 Attempting to detect this within assert_char_at_range
3105 would overcomplicate the logic for the common test cases, so
3106 we detect it here. */
3107 if (should_have_column_data_p (input_locs[0])
3108 && !should_have_column_data_p (input_locs[4]))
3110 /* Verify that get_source_range_for_substring gracefully rejects
3111 this case. */
3112 source_range actual_range;
3113 const char *err
3114 = get_source_range_for_char (test.m_parser, &test.m_concats,
3115 initial_loc, type, 0, &actual_range);
3116 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3117 return;
3120 for (int i = 0; i < 5; i++)
3121 for (int j = 0; j < 2; j++)
3122 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3123 i + 1, 10 + j, 10 + j);
3125 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3126 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3128 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3131 /* Another test of string literal concatenation, this time combined with
3132 various kinds of escaped characters. */
3134 static void
3135 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3137 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3138 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3139 const char *content
3140 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3141 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3142 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3143 lexer_test test (case_, content, NULL);
3145 auto_vec <cpp_string> input_strings;
3146 location_t input_locs[4];
3148 /* Verify that we get the expected tokens back. */
3149 for (int i = 0; i < 4; i++)
3151 const cpp_token *tok = test.get_token ();
3152 ASSERT_EQ (tok->type, CPP_STRING);
3153 input_strings.safe_push (tok->val.str);
3154 input_locs[i] = tok->src_loc;
3157 /* Verify that cpp_interpret_string works. */
3158 cpp_string dst_string;
3159 const enum cpp_ttype type = CPP_STRING;
3160 bool result = cpp_interpret_string (test.m_parser,
3161 input_strings.address (), 4,
3162 &dst_string, type);
3163 ASSERT_TRUE (result);
3164 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3165 free (const_cast <unsigned char *> (dst_string.text));
3167 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3168 test.m_concats.record_string_concatenation (4, input_locs);
3170 location_t initial_loc = input_locs[0];
3172 for (int i = 0; i <= 4; i++)
3173 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3174 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3175 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3176 for (int i = 7; i <= 9; i++)
3177 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3179 /* NUL-terminator should use the location of the final closing quote. */
3180 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3182 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3185 /* Test of string literal in a macro. */
3187 static void
3188 test_lexer_string_locations_macro (const line_table_case &case_)
3190 /* Digits 0-9.
3191 .....................0000000001111111111.22222222223.
3192 .....................1234567890123456789.01234567890. */
3193 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3194 " MACRO");
3195 lexer_test test (case_, content, NULL);
3197 /* Verify that we get the expected tokens back. */
3198 const cpp_token *tok = test.get_token ();
3199 ASSERT_EQ (tok->type, CPP_PADDING);
3201 tok = test.get_token ();
3202 ASSERT_EQ (tok->type, CPP_STRING);
3203 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3205 /* Verify ranges of individual characters. We ought to
3206 see columns within the macro definition. */
3207 for (int i = 0; i <= 10; i++)
3208 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3209 i, 1, 20 + i, 20 + i);
3211 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3213 tok = test.get_token ();
3214 ASSERT_EQ (tok->type, CPP_PADDING);
3217 /* Test of stringification of a macro argument. */
3219 static void
3220 test_lexer_string_locations_stringified_macro_argument
3221 (const line_table_case &case_)
3223 /* .....................000000000111111111122222222223.
3224 .....................123456789012345678901234567890. */
3225 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3226 "MACRO(foo)\n");
3227 lexer_test test (case_, content, NULL);
3229 /* Verify that we get the expected token back. */
3230 const cpp_token *tok = test.get_token ();
3231 ASSERT_EQ (tok->type, CPP_PADDING);
3233 tok = test.get_token ();
3234 ASSERT_EQ (tok->type, CPP_STRING);
3235 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3237 /* We don't support getting the location of a stringified macro
3238 argument. Verify that it fails gracefully. */
3239 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3240 "cpp_interpret_string_1 failed");
3242 tok = test.get_token ();
3243 ASSERT_EQ (tok->type, CPP_PADDING);
3245 tok = test.get_token ();
3246 ASSERT_EQ (tok->type, CPP_PADDING);
3249 /* Ensure that we are fail gracefully if something attempts to pass
3250 in a location that isn't a string literal token. Seen on this code:
3252 const char a[] = " %d ";
3253 __builtin_printf (a, 0.5);
3256 when c-format.c erroneously used the indicated one-character
3257 location as the format string location, leading to a read past the
3258 end of a string buffer in cpp_interpret_string_1. */
3260 static void
3261 test_lexer_string_locations_non_string (const line_table_case &case_)
3263 /* .....................000000000111111111122222222223.
3264 .....................123456789012345678901234567890. */
3265 const char *content = (" a\n");
3266 lexer_test test (case_, content, NULL);
3268 /* Verify that we get the expected token back. */
3269 const cpp_token *tok = test.get_token ();
3270 ASSERT_EQ (tok->type, CPP_NAME);
3271 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3273 /* At this point, libcpp is attempting to interpret the name as a
3274 string literal, despite it not starting with a quote. We don't detect
3275 that, but we should at least fail gracefully. */
3276 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3277 "cpp_interpret_string_1 failed");
3280 /* Ensure that we can read substring information for a token which
3281 starts in one linemap and ends in another . Adapted from
3282 gcc.dg/cpp/pr69985.c. */
3284 static void
3285 test_lexer_string_locations_long_line (const line_table_case &case_)
3287 /* .....................000000.000111111111
3288 .....................123456.789012346789. */
3289 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3290 " \"0123456789012345678901234567890123456789"
3291 "0123456789012345678901234567890123456789"
3292 "0123456789012345678901234567890123456789"
3293 "0123456789\"\n");
3295 lexer_test test (case_, content, NULL);
3297 /* Verify that we get the expected token back. */
3298 const cpp_token *tok = test.get_token ();
3299 ASSERT_EQ (tok->type, CPP_STRING);
3301 if (!should_have_column_data_p (line_table->highest_location))
3302 return;
3304 /* Verify ranges of individual characters. */
3305 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3306 for (int i = 0; i < 131; i++)
3307 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3308 i, 2, 7 + i, 7 + i);
3311 /* Test of locations within a raw string that doesn't contain a newline. */
3313 static void
3314 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3316 /* .....................00.0000000111111111122.
3317 .....................12.3456789012345678901. */
3318 const char *content = ("R\"foo(0123456789)foo\"\n");
3319 lexer_test test (case_, content, NULL);
3321 /* Verify that we get the expected token back. */
3322 const cpp_token *tok = test.get_token ();
3323 ASSERT_EQ (tok->type, CPP_STRING);
3325 /* Verify that cpp_interpret_string works. */
3326 cpp_string dst_string;
3327 const enum cpp_ttype type = CPP_STRING;
3328 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3329 &dst_string, type);
3330 ASSERT_TRUE (result);
3331 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3332 free (const_cast <unsigned char *> (dst_string.text));
3334 if (!should_have_column_data_p (line_table->highest_location))
3335 return;
3337 /* 0-9, plus the nil terminator. */
3338 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3339 for (int i = 0; i < 11; i++)
3340 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3341 i, 1, 7 + i, 7 + i);
3344 /* Test of locations within a raw string that contains a newline. */
3346 static void
3347 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3349 /* .....................00.0000.
3350 .....................12.3456. */
3351 const char *content = ("R\"foo(\n"
3352 /* .....................00000.
3353 .....................12345. */
3354 "hello\n"
3355 "world\n"
3356 /* .....................00000.
3357 .....................12345. */
3358 ")foo\"\n");
3359 lexer_test test (case_, content, NULL);
3361 /* Verify that we get the expected token back. */
3362 const cpp_token *tok = test.get_token ();
3363 ASSERT_EQ (tok->type, CPP_STRING);
3365 /* Verify that cpp_interpret_string works. */
3366 cpp_string dst_string;
3367 const enum cpp_ttype type = CPP_STRING;
3368 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3369 &dst_string, type);
3370 ASSERT_TRUE (result);
3371 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3372 free (const_cast <unsigned char *> (dst_string.text));
3374 if (!should_have_column_data_p (line_table->highest_location))
3375 return;
3377 /* Currently we don't support locations within raw strings that
3378 contain newlines. */
3379 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3380 "range endpoints are on different lines");
3383 /* Test of parsing an unterminated raw string. */
3385 static void
3386 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3388 const char *content = "R\"ouch()ouCh\" /* etc */";
3390 lexer_error_sink errors;
3391 lexer_test test (case_, content, &errors);
3392 test.m_implicitly_expect_EOF = false;
3394 /* Attempt to parse the raw string. */
3395 const cpp_token *tok = test.get_token ();
3396 ASSERT_EQ (tok->type, CPP_EOF);
3398 ASSERT_EQ (1, errors.m_errors.length ());
3399 /* We expect the message "unterminated raw string"
3400 in the "cpplib" translation domain.
3401 It's not clear that dgettext is available on all supported hosts,
3402 so this assertion is commented-out for now.
3403 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3404 errors.m_errors[0]);
3408 /* Test of lexing char constants. */
3410 static void
3411 test_lexer_char_constants (const line_table_case &case_)
3413 /* Various char constants.
3414 .....................0000000001111111111.22222222223.
3415 .....................1234567890123456789.01234567890. */
3416 const char *content = (" 'a'\n"
3417 " u'a'\n"
3418 " U'a'\n"
3419 " L'a'\n"
3420 " 'abc'\n");
3421 lexer_test test (case_, content, NULL);
3423 /* Verify that we get the expected tokens back. */
3424 /* 'a'. */
3425 const cpp_token *tok = test.get_token ();
3426 ASSERT_EQ (tok->type, CPP_CHAR);
3427 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3429 unsigned int chars_seen;
3430 int unsignedp;
3431 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3432 &chars_seen, &unsignedp);
3433 ASSERT_EQ (cc, 'a');
3434 ASSERT_EQ (chars_seen, 1);
3436 /* u'a'. */
3437 tok = test.get_token ();
3438 ASSERT_EQ (tok->type, CPP_CHAR16);
3439 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3441 /* U'a'. */
3442 tok = test.get_token ();
3443 ASSERT_EQ (tok->type, CPP_CHAR32);
3444 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3446 /* L'a'. */
3447 tok = test.get_token ();
3448 ASSERT_EQ (tok->type, CPP_WCHAR);
3449 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3451 /* 'abc' (c-char-sequence). */
3452 tok = test.get_token ();
3453 ASSERT_EQ (tok->type, CPP_CHAR);
3454 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3456 /* A table of interesting location_t values, giving one axis of our test
3457 matrix. */
3459 static const location_t boundary_locations[] = {
3460 /* Zero means "don't override the default values for a new line_table". */
3463 /* An arbitrary non-zero value that isn't close to one of
3464 the boundary values below. */
3465 0x10000,
3467 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3468 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3469 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3470 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3471 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3472 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3474 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3475 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3476 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3477 LINE_MAP_MAX_LOCATION_WITH_COLS,
3478 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3479 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3482 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3484 void
3485 for_each_line_table_case (void (*testcase) (const line_table_case &))
3487 /* As noted above in the description of struct line_table_case,
3488 we want to explore a test matrix of interesting line_table
3489 situations, running various selftests for each case within the
3490 matrix. */
3492 /* Run all tests with:
3493 (a) line_table->default_range_bits == 0, and
3494 (b) line_table->default_range_bits == 5. */
3495 int num_cases_tested = 0;
3496 for (int default_range_bits = 0; default_range_bits <= 5;
3497 default_range_bits += 5)
3499 /* ...and use each of the "interesting" location values as
3500 the starting location within line_table. */
3501 const int num_boundary_locations
3502 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3503 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3505 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3507 testcase (c);
3509 num_cases_tested++;
3513 /* Verify that we fully covered the test matrix. */
3514 ASSERT_EQ (num_cases_tested, 2 * 12);
3517 /* Run all of the selftests within this file. */
3519 void
3520 input_c_tests ()
3522 test_should_have_column_data_p ();
3523 test_unknown_location ();
3524 test_builtins ();
3525 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3527 for_each_line_table_case (test_accessing_ordinary_linemaps);
3528 for_each_line_table_case (test_lexer);
3529 for_each_line_table_case (test_lexer_string_locations_simple);
3530 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3531 for_each_line_table_case (test_lexer_string_locations_hex);
3532 for_each_line_table_case (test_lexer_string_locations_oct);
3533 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3534 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3535 for_each_line_table_case (test_lexer_string_locations_ucn4);
3536 for_each_line_table_case (test_lexer_string_locations_ucn8);
3537 for_each_line_table_case (test_lexer_string_locations_wide_string);
3538 for_each_line_table_case (test_lexer_string_locations_string16);
3539 for_each_line_table_case (test_lexer_string_locations_string32);
3540 for_each_line_table_case (test_lexer_string_locations_u8);
3541 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3542 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3543 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3544 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3545 for_each_line_table_case (test_lexer_string_locations_macro);
3546 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3547 for_each_line_table_case (test_lexer_string_locations_non_string);
3548 for_each_line_table_case (test_lexer_string_locations_long_line);
3549 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3550 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3551 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3552 for_each_line_table_case (test_lexer_char_constants);
3554 test_reading_source_line ();
3557 } // namespace selftest
3559 #endif /* CHECKING_P */