1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
24 #include "diagnostic-core.h"
32 /* This is a cache used by get_next_line to store the content of a
33 file to be searched for file lines. */
36 /* These are information used to store a line boundary. */
39 /* The line number. It starts from 1. */
42 /* The position (byte count) of the beginning of the line,
43 relative to the file data pointer. This starts at zero. */
46 /* The position (byte count) of the last byte of the line. This
47 normally points to the '\n' character, or to one byte after the
48 last byte of the file, if the file doesn't contain a '\n'
52 line_info (size_t l
, size_t s
, size_t e
)
53 : line_num (l
), start_pos (s
), end_pos (e
)
57 :line_num (0), start_pos (0), end_pos (0)
61 /* The number of time this file has been accessed. This is used
62 to designate which file cache to evict from the cache
66 const char *file_path
;
70 /* This points to the content of the file that we've read so
74 /* The size of the DATA array above.*/
77 /* The number of bytes read from the underlying file so far. This
78 must be less (or equal) than SIZE above. */
81 /* The index of the beginning of the current line. */
82 size_t line_start_idx
;
84 /* The number of the previous line read. This starts at 1. Zero
85 means we've read no line so far. */
88 /* This is the total number of lines of the current file. At the
89 moment, we try to get this information from the line map
90 subsystem. Note that this is just a hint. When using the C++
91 front-end, this hint is correct because the input file is then
92 completely tokenized before parsing starts; so the line map knows
93 the number of lines before compilation really starts. For e.g,
94 the C front-end, it can happen that we start emitting diagnostics
95 before the line map has seen the end of the file. */
98 /* This is a record of the beginning and end of the lines we've seen
99 while reading the file. This is useful to avoid walking the data
100 from the beginning when we are asked to read a line that is
101 before LINE_START_IDX above. Note that the maximum size of this
102 record is fcache_line_record_size, so that the memory consumption
103 doesn't explode. We thus scale total_lines down to
104 fcache_line_record_size. */
105 vec
<line_info
, va_heap
> line_record
;
111 /* Current position in real source file. */
113 location_t input_location
= UNKNOWN_LOCATION
;
115 struct line_maps
*line_table
;
117 static fcache
*fcache_tab
;
118 static const size_t fcache_tab_size
= 16;
119 static const size_t fcache_buffer_size
= 4 * 1024;
120 static const size_t fcache_line_record_size
= 100;
122 /* Expand the source location LOC into a human readable location. If
123 LOC resolves to a builtin location, the file name of the readable
124 location is set to the string "<built-in>". If EXPANSION_POINT_P is
125 TRUE and LOC is virtual, then it is resolved to the expansion
126 point of the involved macro. Otherwise, it is resolved to the
127 spelling location of the token.
129 When resolving to the spelling location of the token, if the
130 resulting location is for a built-in location (that is, it has no
131 associated line/column) in the context of a macro expansion, the
132 returned location is the first one (while unwinding the macro
133 location towards its expansion point) that is in real source
136 static expanded_location
137 expand_location_1 (source_location loc
,
138 bool expansion_point_p
)
140 expanded_location xloc
;
141 const line_map_ordinary
*map
;
142 enum location_resolution_kind lrk
= LRK_MACRO_EXPANSION_POINT
;
145 if (IS_ADHOC_LOC (loc
))
147 block
= LOCATION_BLOCK (loc
);
148 loc
= LOCATION_LOCUS (loc
);
151 memset (&xloc
, 0, sizeof (xloc
));
153 if (loc
>= RESERVED_LOCATION_COUNT
)
155 if (!expansion_point_p
)
157 /* We want to resolve LOC to its spelling location.
159 But if that spelling location is a reserved location that
160 appears in the context of a macro expansion (like for a
161 location for a built-in token), let's consider the first
162 location (toward the expansion point) that is not reserved;
163 that is, the first location that is in real source code. */
164 loc
= linemap_unwind_to_first_non_reserved_loc (line_table
,
166 lrk
= LRK_SPELLING_LOCATION
;
168 loc
= linemap_resolve_location (line_table
, loc
,
170 xloc
= linemap_expand_location (line_table
, map
, loc
);
174 if (loc
<= BUILTINS_LOCATION
)
175 xloc
.file
= loc
== UNKNOWN_LOCATION
? NULL
: _("<built-in>");
180 /* Initialize the set of cache used for files accessed by caret
184 diagnostic_file_cache_init (void)
186 if (fcache_tab
== NULL
)
187 fcache_tab
= new fcache
[fcache_tab_size
];
190 /* Free the resources used by the set of cache used for files accessed
191 by caret diagnostic. */
194 diagnostic_file_cache_fini (void)
198 delete [] (fcache_tab
);
203 /* Return the total lines number that have been read so far by the
204 line map (in the preprocessor) so far. For languages like C++ that
205 entirely preprocess the input file before starting to parse, this
206 equals the actual number of lines of the file. */
209 total_lines_num (const char *file_path
)
212 source_location l
= 0;
213 if (linemap_get_file_highest_location (line_table
, file_path
, &l
))
215 gcc_assert (l
>= RESERVED_LOCATION_COUNT
);
216 expanded_location xloc
= expand_location (l
);
222 /* Lookup the cache used for the content of a given file accessed by
223 caret diagnostic. Return the found cached file, or NULL if no
224 cached file was found. */
227 lookup_file_in_cache_tab (const char *file_path
)
229 if (file_path
== NULL
)
232 diagnostic_file_cache_init ();
234 /* This will contain the found cached file. */
236 for (unsigned i
= 0; i
< fcache_tab_size
; ++i
)
238 fcache
*c
= &fcache_tab
[i
];
239 if (c
->file_path
&& !strcmp (c
->file_path
, file_path
))
252 /* Return the file cache that has been less used, recently, or the
253 first empty one. If HIGHEST_USE_COUNT is non-null,
254 *HIGHEST_USE_COUNT is set to the highest use count of the entries
255 in the cache table. */
258 evicted_cache_tab_entry (unsigned *highest_use_count
)
260 diagnostic_file_cache_init ();
262 fcache
*to_evict
= &fcache_tab
[0];
263 unsigned huc
= to_evict
->use_count
;
264 for (unsigned i
= 1; i
< fcache_tab_size
; ++i
)
266 fcache
*c
= &fcache_tab
[i
];
267 bool c_is_empty
= (c
->file_path
== NULL
);
269 if (c
->use_count
< to_evict
->use_count
270 || (to_evict
->file_path
&& c_is_empty
))
271 /* We evict C because it's either an entry with a lower use
272 count or one that is empty. */
275 if (huc
< c
->use_count
)
279 /* We've reached the end of the cache; subsequent elements are
284 if (highest_use_count
)
285 *highest_use_count
= huc
;
290 /* Create the cache used for the content of a given file to be
291 accessed by caret diagnostic. This cache is added to an array of
292 cache and can be retrieved by lookup_file_in_cache_tab. This
293 function returns the created cache. Note that only the last
294 fcache_tab_size files are cached. */
297 add_file_to_cache_tab (const char *file_path
)
300 FILE *fp
= fopen (file_path
, "r");
304 unsigned highest_use_count
= 0;
305 fcache
*r
= evicted_cache_tab_entry (&highest_use_count
);
306 r
->file_path
= file_path
;
311 r
->line_start_idx
= 0;
313 r
->line_record
.truncate (0);
314 /* Ensure that this cache entry doesn't get evicted next time
315 add_file_to_cache_tab is called. */
316 r
->use_count
= ++highest_use_count
;
317 r
->total_lines
= total_lines_num (file_path
);
322 /* Lookup the cache used for the content of a given file accessed by
323 caret diagnostic. If no cached file was found, create a new cache
324 for this file, add it to the array of cached file and return
328 lookup_or_add_file_to_cache_tab (const char *file_path
)
330 fcache
*r
= lookup_file_in_cache_tab (file_path
);
332 r
= add_file_to_cache_tab (file_path
);
336 /* Default constructor for a cache of file used by caret
340 : use_count (0), file_path (NULL
), fp (NULL
), data (0),
341 size (0), nb_read (0), line_start_idx (0), line_num (0),
344 line_record
.create (0);
347 /* Destructor for a cache of file used by caret diagnostic. */
361 line_record
.release ();
364 /* Returns TRUE iff the cache would need to be filled with data coming
365 from the file. That is, either the cache is empty or full or the
366 current line is empty. Note that if the cache is full, it would
367 need to be extended and filled again. */
370 needs_read (fcache
*c
)
372 return (c
->nb_read
== 0
373 || c
->nb_read
== c
->size
374 || (c
->line_start_idx
>= c
->nb_read
- 1));
377 /* Return TRUE iff the cache is full and thus needs to be
381 needs_grow (fcache
*c
)
383 return c
->nb_read
== c
->size
;
386 /* Grow the cache if it needs to be extended. */
389 maybe_grow (fcache
*c
)
394 size_t size
= c
->size
== 0 ? fcache_buffer_size
: c
->size
* 2;
395 c
->data
= XRESIZEVEC (char, c
->data
, size
+ 1);
399 /* Read more data into the cache. Extends the cache if need be.
400 Returns TRUE iff new data could be read. */
403 read_data (fcache
*c
)
405 if (feof (c
->fp
) || ferror (c
->fp
))
410 char * from
= c
->data
+ c
->nb_read
;
411 size_t to_read
= c
->size
- c
->nb_read
;
412 size_t nb_read
= fread (from
, 1, to_read
, c
->fp
);
417 c
->nb_read
+= nb_read
;
421 /* Read new data iff the cache needs to be filled with more data
422 coming from the file FP. Return TRUE iff the cache was filled with
426 maybe_read_data (fcache
*c
)
430 return read_data (c
);
433 /* Read a new line from file FP, using C as a cache for the data
434 coming from the file. Upon successful completion, *LINE is set to
435 the beginning of the line found. Space for that line has been
436 allocated in the cache thus *LINE has the same life time as C.
437 *LINE_LEN is set to the length of the line. Note that the line
438 does not contain any terminal delimiter. This function returns
439 true if some data was read or process from the cache, false
440 otherwise. Note that subsequent calls to get_next_line return the
441 next lines of the file and might overwrite the content of
445 get_next_line (fcache
*c
, char **line
, ssize_t
*line_len
)
447 /* Fill the cache with data to process. */
450 size_t remaining_size
= c
->nb_read
- c
->line_start_idx
;
451 if (remaining_size
== 0)
452 /* There is no more data to process. */
455 char *line_start
= c
->data
+ c
->line_start_idx
;
457 char *next_line_start
= NULL
;
459 char *line_end
= (char *) memchr (line_start
, '\n', remaining_size
);
460 if (line_end
== NULL
)
462 /* We haven't found the end-of-line delimiter in the cache.
463 Fill the cache with more data from the file and look for the
465 while (maybe_read_data (c
))
467 line_start
= c
->data
+ c
->line_start_idx
;
468 remaining_size
= c
->nb_read
- c
->line_start_idx
;
469 line_end
= (char *) memchr (line_start
, '\n', remaining_size
);
470 if (line_end
!= NULL
)
472 next_line_start
= line_end
+ 1;
476 if (line_end
== NULL
)
477 /* We've loadded all the file into the cache and still no
478 '\n'. Let's say the line ends up at one byte passed the
479 end of the file. This is to stay consistent with the case
480 of when the line ends up with a '\n' and line_end points to
481 that terminal '\n'. That consistency is useful below in
482 the len calculation. */
483 line_end
= c
->data
+ c
->nb_read
;
486 next_line_start
= line_end
+ 1;
491 /* At this point, we've found the end of the of line. It either
492 points to the '\n' or to one byte after the last byte of the
494 gcc_assert (line_end
!= NULL
);
496 len
= line_end
- line_start
;
498 if (c
->line_start_idx
< c
->nb_read
)
503 /* Before we update our line record, make sure the hint about the
504 total number of lines of the file is correct. If it's not, then
505 we give up recording line boundaries from now on. */
506 bool update_line_record
= true;
507 if (c
->line_num
> c
->total_lines
)
508 update_line_record
= false;
510 /* Now update our line record so that re-reading lines from the
511 before c->line_start_idx is faster. */
512 if (update_line_record
513 && c
->line_record
.length () < fcache_line_record_size
)
515 /* If the file lines fits in the line record, we just record all
517 if (c
->total_lines
<= fcache_line_record_size
518 && c
->line_num
> c
->line_record
.length ())
519 c
->line_record
.safe_push (fcache::line_info (c
->line_num
,
521 line_end
- c
->data
));
522 else if (c
->total_lines
> fcache_line_record_size
)
524 /* ... otherwise, we just scale total_lines down to
525 (fcache_line_record_size lines. */
526 size_t n
= (c
->line_num
* fcache_line_record_size
) / c
->total_lines
;
527 if (c
->line_record
.length () == 0
528 || n
>= c
->line_record
.length ())
529 c
->line_record
.safe_push (fcache::line_info (c
->line_num
,
531 line_end
- c
->data
));
535 /* Update c->line_start_idx so that it points to the next line to be
538 c
->line_start_idx
= next_line_start
- c
->data
;
540 /* We didn't find any terminal '\n'. Let's consider that the end
541 of line is the end of the data in the cache. The next
542 invocation of get_next_line will either read more data from the
543 underlying file or return false early because we've reached the
545 c
->line_start_idx
= c
->nb_read
;
552 /* Reads the next line from FILE into *LINE. If *LINE is too small
553 (or NULL) it is allocated (or extended) to have enough space to
554 containe the line. *LINE_LENGTH must contain the size of the
555 initial*LINE buffer. It's then updated by this function to the
556 actual length of the returned line. Note that the returned line
557 can contain several zero bytes. Also note that the returned string
558 is allocated in static storage that is going to be re-used by
559 subsequent invocations of read_line. */
562 read_next_line (fcache
*cache
, char ** line
, ssize_t
*line_len
)
567 if (!get_next_line (cache
, &l
, &len
))
571 *line
= XNEWVEC (char, len
);
574 *line
= XRESIZEVEC (char, *line
, len
);
576 memcpy (*line
, l
, len
);
582 /* Consume the next bytes coming from the cache (or from its
583 underlying file if there are remaining unread bytes in the file)
584 until we reach the next end-of-line (or end-of-file). There is no
585 copying from the cache involved. Return TRUE upon successful
589 goto_next_line (fcache
*cache
)
594 return get_next_line (cache
, &l
, &len
);
597 /* Read an arbitrary line number LINE_NUM from the file cached in C.
598 The line is copied into *LINE. *LINE_LEN must have been set to the
599 length of *LINE. If *LINE is too small (or NULL) it's extended (or
600 allocated) and *LINE_LEN is adjusted accordingly. *LINE ends up
601 with a terminal zero byte and can contain additional zero bytes.
602 This function returns bool if a line was read. */
605 read_line_num (fcache
*c
, size_t line_num
,
606 char ** line
, ssize_t
*line_len
)
608 gcc_assert (line_num
> 0);
610 if (line_num
<= c
->line_num
)
612 /* We've been asked to read lines that are before c->line_num.
613 So lets use our line record (if it's not empty) to try to
614 avoid re-reading the file from the beginning again. */
616 if (c
->line_record
.is_empty ())
618 c
->line_start_idx
= 0;
623 fcache::line_info
*i
= NULL
;
624 if (c
->total_lines
<= fcache_line_record_size
)
626 /* In languages where the input file is not totally
627 preprocessed up front, the c->total_lines hint
628 can be smaller than the number of lines of the
629 file. In that case, only the first
630 c->total_lines have been recorded.
632 Otherwise, the first c->total_lines we've read have
633 their start/end recorded here. */
634 i
= (line_num
<= c
->total_lines
)
635 ? &c
->line_record
[line_num
- 1]
636 : &c
->line_record
[c
->total_lines
- 1];
637 gcc_assert (i
->line_num
<= line_num
);
641 /* So the file had more lines than our line record
642 size. Thus the number of lines we've recorded has
643 been scaled down to fcache_line_reacord_size. Let's
644 pick the start/end of the recorded line that is
645 closest to line_num. */
646 size_t n
= (line_num
<= c
->total_lines
)
647 ? line_num
* fcache_line_record_size
/ c
->total_lines
648 : c
->line_record
.length () - 1;
649 if (n
< c
->line_record
.length ())
651 i
= &c
->line_record
[n
];
652 gcc_assert (i
->line_num
<= line_num
);
656 if (i
&& i
->line_num
== line_num
)
658 /* We have the start/end of the line. Let's just copy
659 it again and we are done. */
660 ssize_t len
= i
->end_pos
- i
->start_pos
+ 1;
662 *line
= XRESIZEVEC (char, *line
, len
);
663 memmove (*line
, c
->data
+ i
->start_pos
, len
);
664 (*line
)[len
- 1] = '\0';
671 c
->line_start_idx
= i
->start_pos
;
672 c
->line_num
= i
->line_num
- 1;
676 c
->line_start_idx
= 0;
682 /* Let's walk from line c->line_num up to line_num - 1, without
684 while (c
->line_num
< line_num
- 1)
685 if (!goto_next_line (c
))
688 /* The line we want is the next one. Let's read and copy it back to
690 return read_next_line (c
, line
, line_len
);
693 /* Return the physical source line that corresponds to FILE_PATH/LINE in a
694 buffer that is statically allocated. The newline is replaced by
695 the null character. Note that the line can contain several null
696 characters, so LINE_LEN, if non-null, points to the actual length
700 location_get_source_line (const char *file_path
, int line
,
709 fcache
*c
= lookup_or_add_file_to_cache_tab (file_path
);
713 bool read
= read_line_num (c
, line
, &buffer
, &len
);
715 if (read
&& line_len
)
718 return read
? buffer
: NULL
;
721 /* Test if the location originates from the spelling location of a
722 builtin-tokens. That is, return TRUE if LOC is a (possibly
723 virtual) location of a built-in token that appears in the expansion
724 list of a macro. Please note that this function also works on
725 tokens that result from built-in tokens. For instance, the
726 function would return true if passed a token "4" that is the result
727 of the expansion of the built-in __LINE__ macro. */
729 is_location_from_builtin_token (source_location loc
)
731 const line_map_ordinary
*map
= NULL
;
732 loc
= linemap_resolve_location (line_table
, loc
,
733 LRK_SPELLING_LOCATION
, &map
);
734 return loc
== BUILTINS_LOCATION
;
737 /* Expand the source location LOC into a human readable location. If
738 LOC is virtual, it resolves to the expansion point of the involved
739 macro. If LOC resolves to a builtin location, the file name of the
740 readable location is set to the string "<built-in>". */
743 expand_location (source_location loc
)
745 return expand_location_1 (loc
, /*expansion_point_p=*/true);
748 /* Expand the source location LOC into a human readable location. If
749 LOC is virtual, it resolves to the expansion location of the
750 relevant macro. If LOC resolves to a builtin location, the file
751 name of the readable location is set to the string
755 expand_location_to_spelling_point (source_location loc
)
757 return expand_location_1 (loc
, /*expansion_point_p=*/false);
760 /* The rich_location class within libcpp requires a way to expand
761 source_location instances, and relies on the client code
762 providing a symbol named
763 linemap_client_expand_location_to_spelling_point
766 This is the implementation for libcommon.a (all host binaries),
767 which simply calls into expand_location_to_spelling_point. */
770 linemap_client_expand_location_to_spelling_point (source_location loc
)
772 return expand_location_to_spelling_point (loc
);
776 /* If LOCATION is in a system header and if it is a virtual location for
777 a token coming from the expansion of a macro, unwind it to the
778 location of the expansion point of the macro. Otherwise, just return
781 This is used for instance when we want to emit diagnostics about a
782 token that may be located in a macro that is itself defined in a
783 system header, for example, for the NULL macro. In such a case, if
784 LOCATION were passed directly to diagnostic functions such as
785 warning_at, the diagnostic would be suppressed (unless
786 -Wsystem-headers). */
789 expansion_point_location_if_in_system_header (source_location location
)
791 if (in_system_header_at (location
))
792 location
= linemap_resolve_location (line_table
, location
,
793 LRK_MACRO_EXPANSION_POINT
,
798 /* If LOCATION is a virtual location for a token coming from the expansion
799 of a macro, unwind to the location of the expansion point of the macro. */
802 expansion_point_location (source_location location
)
804 return linemap_resolve_location (line_table
, location
,
805 LRK_MACRO_EXPANSION_POINT
, NULL
);
808 /* Given location LOC, strip away any packed range information
809 or ad-hoc information. */
812 get_pure_location (location_t loc
)
814 if (IS_ADHOC_LOC (loc
))
816 = line_table
->location_adhoc_data_map
.data
[loc
& MAX_SOURCE_LOCATION
].locus
;
818 if (loc
>= LINEMAPS_MACRO_LOWEST_LOCATION (line_table
))
821 if (loc
< RESERVED_LOCATION_COUNT
)
824 const line_map
*map
= linemap_lookup (line_table
, loc
);
825 const line_map_ordinary
*ordmap
= linemap_check_ordinary (map
);
827 return loc
& ~((1 << ordmap
->m_range_bits
) - 1);
830 /* Construct a location with caret at CARET, ranging from START to
836 523 return foo + bar;
840 The location's caret is at the "+", line 523 column 15, but starts
841 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
842 of "bar" at column 19. */
845 make_location (location_t caret
, location_t start
, location_t finish
)
847 location_t pure_loc
= get_pure_location (caret
);
848 source_range src_range
;
849 src_range
.m_start
= start
;
850 src_range
.m_finish
= finish
;
851 location_t combined_loc
= COMBINE_LOCATION_DATA (line_table
,
859 #define ONE_M (ONE_K * ONE_K)
861 /* Display a number as an integer multiple of either:
862 - 1024, if said integer is >= to 10 K (in base 2)
863 - 1024 * 1024, if said integer is >= 10 M in (base 2)
865 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
867 : ((x) < 10 * ONE_M \
871 /* For a given integer, display either:
872 - the character 'k', if the number is higher than 10 K (in base 2)
873 but strictly lower than 10 M (in base 2)
874 - the character 'M' if the number is higher than 10 M (in base2)
875 - the charcter ' ' if the number is strictly lower than 10 K */
876 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
878 /* Display an integer amount as multiple of 1K or 1M (in base 2).
879 Display the correct unit (either k, M, or ' ') after the amout, as
881 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
883 /* Dump statistics to stderr about the memory usage of the line_table
884 set of line maps. This also displays some statistics about macro
888 dump_line_table_statistics (void)
890 struct linemap_stats s
;
891 long total_used_map_size
,
893 total_allocated_map_size
;
895 memset (&s
, 0, sizeof (s
));
897 linemap_get_statistics (line_table
, &s
);
899 macro_maps_size
= s
.macro_maps_used_size
900 + s
.macro_maps_locations_size
;
902 total_allocated_map_size
= s
.ordinary_maps_allocated_size
903 + s
.macro_maps_allocated_size
904 + s
.macro_maps_locations_size
;
906 total_used_map_size
= s
.ordinary_maps_used_size
907 + s
.macro_maps_used_size
908 + s
.macro_maps_locations_size
;
910 fprintf (stderr
, "Number of expanded macros: %5ld\n",
911 s
.num_expanded_macros
);
912 if (s
.num_expanded_macros
!= 0)
913 fprintf (stderr
, "Average number of tokens per macro expansion: %5ld\n",
914 s
.num_macro_tokens
/ s
.num_expanded_macros
);
916 "\nLine Table allocations during the "
917 "compilation process\n");
918 fprintf (stderr
, "Number of ordinary maps used: %5ld%c\n",
919 SCALE (s
.num_ordinary_maps_used
),
920 STAT_LABEL (s
.num_ordinary_maps_used
));
921 fprintf (stderr
, "Ordinary map used size: %5ld%c\n",
922 SCALE (s
.ordinary_maps_used_size
),
923 STAT_LABEL (s
.ordinary_maps_used_size
));
924 fprintf (stderr
, "Number of ordinary maps allocated: %5ld%c\n",
925 SCALE (s
.num_ordinary_maps_allocated
),
926 STAT_LABEL (s
.num_ordinary_maps_allocated
));
927 fprintf (stderr
, "Ordinary maps allocated size: %5ld%c\n",
928 SCALE (s
.ordinary_maps_allocated_size
),
929 STAT_LABEL (s
.ordinary_maps_allocated_size
));
930 fprintf (stderr
, "Number of macro maps used: %5ld%c\n",
931 SCALE (s
.num_macro_maps_used
),
932 STAT_LABEL (s
.num_macro_maps_used
));
933 fprintf (stderr
, "Macro maps used size: %5ld%c\n",
934 SCALE (s
.macro_maps_used_size
),
935 STAT_LABEL (s
.macro_maps_used_size
));
936 fprintf (stderr
, "Macro maps locations size: %5ld%c\n",
937 SCALE (s
.macro_maps_locations_size
),
938 STAT_LABEL (s
.macro_maps_locations_size
));
939 fprintf (stderr
, "Macro maps size: %5ld%c\n",
940 SCALE (macro_maps_size
),
941 STAT_LABEL (macro_maps_size
));
942 fprintf (stderr
, "Duplicated maps locations size: %5ld%c\n",
943 SCALE (s
.duplicated_macro_maps_locations_size
),
944 STAT_LABEL (s
.duplicated_macro_maps_locations_size
));
945 fprintf (stderr
, "Total allocated maps size: %5ld%c\n",
946 SCALE (total_allocated_map_size
),
947 STAT_LABEL (total_allocated_map_size
));
948 fprintf (stderr
, "Total used maps size: %5ld%c\n",
949 SCALE (total_used_map_size
),
950 STAT_LABEL (total_used_map_size
));
951 fprintf (stderr
, "Ad-hoc table size: %5ld%c\n",
952 SCALE (s
.adhoc_table_size
),
953 STAT_LABEL (s
.adhoc_table_size
));
954 fprintf (stderr
, "Ad-hoc table entries used: %5ld\n",
955 s
.adhoc_table_entries_used
);
956 fprintf (stderr
, "optimized_ranges: %i\n",
957 line_table
->num_optimized_ranges
);
958 fprintf (stderr
, "unoptimized_ranges: %i\n",
959 line_table
->num_unoptimized_ranges
);
961 fprintf (stderr
, "\n");
964 /* Get location one beyond the final location in ordinary map IDX. */
966 static source_location
967 get_end_location (struct line_maps
*set
, unsigned int idx
)
969 if (idx
== LINEMAPS_ORDINARY_USED (set
) - 1)
970 return set
->highest_location
;
972 struct line_map
*next_map
= LINEMAPS_ORDINARY_MAP_AT (set
, idx
+ 1);
973 return MAP_START_LOCATION (next_map
);
976 /* Helper function for write_digit_row. */
979 write_digit (FILE *stream
, int digit
)
981 fputc ('0' + (digit
% 10), stream
);
984 /* Helper function for dump_location_info.
985 Write a row of numbers to STREAM, numbering a source line,
986 giving the units, tens, hundreds etc of the column number. */
989 write_digit_row (FILE *stream
, int indent
,
990 const line_map_ordinary
*map
,
991 source_location loc
, int max_col
, int divisor
)
993 fprintf (stream
, "%*c", indent
, ' ');
994 fprintf (stream
, "|");
995 for (int column
= 1; column
< max_col
; column
++)
997 source_location column_loc
= loc
+ (column
<< map
->m_range_bits
);
998 write_digit (stream
, column_loc
/ divisor
);
1000 fprintf (stream
, "\n");
1003 /* Write a half-closed (START) / half-open (END) interval of
1004 source_location to STREAM. */
1007 dump_location_range (FILE *stream
,
1008 source_location start
, source_location end
)
1011 " source_location interval: %u <= loc < %u\n",
1015 /* Write a labelled description of a half-closed (START) / half-open (END)
1016 interval of source_location to STREAM. */
1019 dump_labelled_location_range (FILE *stream
,
1021 source_location start
, source_location end
)
1023 fprintf (stream
, "%s\n", name
);
1024 dump_location_range (stream
, start
, end
);
1025 fprintf (stream
, "\n");
1028 /* Write a visualization of the locations in the line_table to STREAM. */
1031 dump_location_info (FILE *stream
)
1033 /* Visualize the reserved locations. */
1034 dump_labelled_location_range (stream
, "RESERVED LOCATIONS",
1035 0, RESERVED_LOCATION_COUNT
);
1037 /* Visualize the ordinary line_map instances, rendering the sources. */
1038 for (unsigned int idx
= 0; idx
< LINEMAPS_ORDINARY_USED (line_table
); idx
++)
1040 source_location end_location
= get_end_location (line_table
, idx
);
1041 /* half-closed: doesn't include this one. */
1043 const line_map_ordinary
*map
1044 = LINEMAPS_ORDINARY_MAP_AT (line_table
, idx
);
1045 fprintf (stream
, "ORDINARY MAP: %i\n", idx
);
1046 dump_location_range (stream
,
1047 MAP_START_LOCATION (map
), end_location
);
1048 fprintf (stream
, " file: %s\n", ORDINARY_MAP_FILE_NAME (map
));
1049 fprintf (stream
, " starting at line: %i\n",
1050 ORDINARY_MAP_STARTING_LINE_NUMBER (map
));
1051 fprintf (stream
, " column and range bits: %i\n",
1052 map
->m_column_and_range_bits
);
1053 fprintf (stream
, " column bits: %i\n",
1054 map
->m_column_and_range_bits
- map
->m_range_bits
);
1055 fprintf (stream
, " range bits: %i\n",
1058 /* Render the span of source lines that this "map" covers. */
1059 for (source_location loc
= MAP_START_LOCATION (map
);
1061 loc
+= (1 << map
->m_range_bits
) )
1063 gcc_assert (pure_location_p (line_table
, loc
) );
1065 expanded_location exploc
1066 = linemap_expand_location (line_table
, map
, loc
);
1068 if (0 == exploc
.column
)
1070 /* Beginning of a new source line: draw the line. */
1073 const char *line_text
= location_get_source_line (exploc
.file
,
1079 "%s:%3i|loc:%5i|%.*s\n",
1080 exploc
.file
, exploc
.line
,
1082 line_size
, line_text
);
1084 /* "loc" is at column 0, which means "the whole line".
1085 Render the locations *within* the line, by underlining
1086 it, showing the source_location numeric values
1088 int max_col
= (1 << map
->m_column_and_range_bits
) - 1;
1089 if (max_col
> line_size
)
1090 max_col
= line_size
+ 1;
1092 int indent
= 14 + strlen (exploc
.file
);
1095 if (end_location
> 999)
1096 write_digit_row (stream
, indent
, map
, loc
, max_col
, 1000);
1099 if (end_location
> 99)
1100 write_digit_row (stream
, indent
, map
, loc
, max_col
, 100);
1103 write_digit_row (stream
, indent
, map
, loc
, max_col
, 10);
1106 write_digit_row (stream
, indent
, map
, loc
, max_col
, 1);
1109 fprintf (stream
, "\n");
1112 /* Visualize unallocated values. */
1113 dump_labelled_location_range (stream
, "UNALLOCATED LOCATIONS",
1114 line_table
->highest_location
,
1115 LINEMAPS_MACRO_LOWEST_LOCATION (line_table
));
1117 /* Visualize the macro line_map instances, rendering the sources. */
1118 for (unsigned int i
= 0; i
< LINEMAPS_MACRO_USED (line_table
); i
++)
1120 /* Each macro map that is allocated owns source_location values
1121 that are *lower* that the one before them.
1122 Hence it's meaningful to view them either in order of ascending
1123 source locations, or in order of ascending macro map index. */
1124 const bool ascending_source_locations
= true;
1125 unsigned int idx
= (ascending_source_locations
1126 ? (LINEMAPS_MACRO_USED (line_table
) - (i
+ 1))
1128 const line_map_macro
*map
= LINEMAPS_MACRO_MAP_AT (line_table
, idx
);
1129 fprintf (stream
, "MACRO %i: %s (%u tokens)\n",
1131 linemap_map_get_macro_name (map
),
1132 MACRO_MAP_NUM_MACRO_TOKENS (map
));
1133 dump_location_range (stream
,
1134 map
->start_location
,
1135 (map
->start_location
1136 + MACRO_MAP_NUM_MACRO_TOKENS (map
)));
1137 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map
),
1138 "expansion point is location %i",
1139 MACRO_MAP_EXPANSION_POINT_LOCATION (map
));
1140 fprintf (stream
, " map->start_location: %u\n",
1141 map
->start_location
);
1143 fprintf (stream
, " macro_locations:\n");
1144 for (unsigned int i
= 0; i
< MACRO_MAP_NUM_MACRO_TOKENS (map
); i
++)
1146 source_location x
= MACRO_MAP_LOCATIONS (map
)[2 * i
];
1147 source_location y
= MACRO_MAP_LOCATIONS (map
)[(2 * i
) + 1];
1149 /* linemap_add_macro_token encodes token numbers in an expansion
1150 by putting them after MAP_START_LOCATION. */
1152 /* I'm typically seeing 4 uninitialized entries at the end of
1154 This appears to be due to macro.c:replace_args
1155 adding 2 extra args for padding tokens; presumably there may
1156 be a leading and/or trailing padding token injected,
1157 each for 2 more location slots.
1158 This would explain there being up to 4 source_locations slots
1159 that may be uninitialized. */
1161 fprintf (stream
, " %u: %u, %u\n",
1167 if (x
< MAP_START_LOCATION (map
))
1168 inform (x
, "token %u has x-location == y-location == %u", i
, x
);
1171 "x-location == y-location == %u encodes token # %u\n",
1172 x
, x
- MAP_START_LOCATION (map
));
1176 inform (x
, "token %u has x-location == %u", i
, x
);
1177 inform (x
, "token %u has y-location == %u", i
, y
);
1180 fprintf (stream
, "\n");
1183 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1184 macro map, presumably due to an off-by-one error somewhere
1185 between the logic in linemap_enter_macro and
1186 LINEMAPS_MACRO_LOWEST_LOCATION. */
1187 dump_labelled_location_range (stream
, "MAX_SOURCE_LOCATION",
1188 MAX_SOURCE_LOCATION
,
1189 MAX_SOURCE_LOCATION
+ 1);
1191 /* Visualize ad-hoc values. */
1192 dump_labelled_location_range (stream
, "AD-HOC LOCATIONS",
1193 MAX_SOURCE_LOCATION
+ 1, UINT_MAX
);
1196 /* string_concat's constructor. */
1198 string_concat::string_concat (int num
, location_t
*locs
)
1201 m_locs
= ggc_vec_alloc
<location_t
> (num
);
1202 for (int i
= 0; i
< num
; i
++)
1203 m_locs
[i
] = locs
[i
];
1206 /* string_concat_db's constructor. */
1208 string_concat_db::string_concat_db ()
1210 m_table
= hash_map
<location_hash
, string_concat
*>::create_ggc (64);
1213 /* Record that a string concatenation occurred, covering NUM
1214 string literal tokens. LOCS is an array of size NUM, containing the
1215 locations of the tokens. A copy of LOCS is taken. */
1218 string_concat_db::record_string_concatenation (int num
, location_t
*locs
)
1220 gcc_assert (num
> 1);
1223 location_t key_loc
= get_key_loc (locs
[0]);
1225 string_concat
*concat
1226 = new (ggc_alloc
<string_concat
> ()) string_concat (num
, locs
);
1227 m_table
->put (key_loc
, concat
);
1230 /* Determine if LOC was the location of the the initial token of a
1231 concatenation of string literal tokens.
1232 If so, *OUT_NUM is written to with the number of tokens, and
1233 *OUT_LOCS with the location of an array of locations of the
1234 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1235 storage owned by the string_concat_db.
1236 Otherwise, return false. */
1239 string_concat_db::get_string_concatenation (location_t loc
,
1241 location_t
**out_locs
)
1243 gcc_assert (out_num
);
1244 gcc_assert (out_locs
);
1246 location_t key_loc
= get_key_loc (loc
);
1248 string_concat
**concat
= m_table
->get (key_loc
);
1252 *out_num
= (*concat
)->m_num
;
1253 *out_locs
=(*concat
)->m_locs
;
1257 /* Internal function. Canonicalize LOC into a form suitable for
1258 use as a key within the database, stripping away macro expansion,
1259 ad-hoc information, and range information, using the location of
1260 the start of LOC within an ordinary linemap. */
1263 string_concat_db::get_key_loc (location_t loc
)
1265 loc
= linemap_resolve_location (line_table
, loc
, LRK_SPELLING_LOCATION
,
1268 loc
= get_range_from_loc (line_table
, loc
).m_start
;
1273 /* Helper class for use within get_substring_ranges_for_loc.
1274 An vec of cpp_string with responsibility for releasing all of the
1275 str->text for each str in the vector. */
1277 class auto_cpp_string_vec
: public auto_vec
<cpp_string
>
1280 auto_cpp_string_vec (int alloc
)
1281 : auto_vec
<cpp_string
> (alloc
) {}
1283 ~auto_cpp_string_vec ()
1285 /* Clean up the copies within this vec. */
1288 FOR_EACH_VEC_ELT (*this, i
, str
)
1289 free (const_cast <unsigned char *> (str
->text
));
1293 /* Attempt to populate RANGES with source location information on the
1294 individual characters within the string literal found at STRLOC.
1295 If CONCATS is non-NULL, then any string literals that the token at
1296 STRLOC was concatenated with are also added to RANGES.
1298 Return NULL if successful, or an error message if any errors occurred (in
1299 which case RANGES may be only partially populated and should not
1302 This is implemented by re-parsing the relevant source line(s). */
1305 get_substring_ranges_for_loc (cpp_reader
*pfile
,
1306 string_concat_db
*concats
,
1308 enum cpp_ttype type
,
1309 cpp_substring_ranges
&ranges
)
1313 if (strloc
== UNKNOWN_LOCATION
)
1314 return "unknown location";
1316 /* If string concatenation has occurred at STRLOC, get the locations
1317 of all of the literal tokens making up the compound string.
1318 Otherwise, just use STRLOC. */
1320 location_t
*strlocs
= &strloc
;
1322 concats
->get_string_concatenation (strloc
, &num_locs
, &strlocs
);
1324 auto_cpp_string_vec
strs (num_locs
);
1325 auto_vec
<cpp_string_location_reader
> loc_readers (num_locs
);
1326 for (int i
= 0; i
< num_locs
; i
++)
1328 /* Get range of strloc. We will use it to locate the start and finish
1329 of the literal token within the line. */
1330 source_range src_range
= get_range_from_loc (line_table
, strlocs
[i
]);
1332 if (src_range
.m_start
>= LINEMAPS_MACRO_LOWEST_LOCATION (line_table
))
1333 /* If the string is within a macro expansion, we can't get at the
1335 return "macro expansion";
1337 if (src_range
.m_start
>= LINE_MAP_MAX_LOCATION_WITH_COLS
)
1338 /* If so, we can't reliably determine where the token started within
1340 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1342 if (src_range
.m_finish
>= LINE_MAP_MAX_LOCATION_WITH_COLS
)
1343 /* If so, we can't reliably determine where the token finished within
1345 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1347 expanded_location start
1348 = expand_location_to_spelling_point (src_range
.m_start
);
1349 expanded_location finish
1350 = expand_location_to_spelling_point (src_range
.m_finish
);
1351 if (start
.file
!= finish
.file
)
1352 return "range endpoints are in different files";
1353 if (start
.line
!= finish
.line
)
1354 return "range endpoints are on different lines";
1355 if (start
.column
> finish
.column
)
1356 return "range endpoints are reversed";
1359 const char *line
= location_get_source_line (start
.file
, start
.line
,
1362 return "unable to read source line";
1364 /* Determine the location of the literal (including quotes
1365 and leading prefix chars, such as the 'u' in a u""
1367 const char *literal
= line
+ start
.column
- 1;
1368 int literal_length
= finish
.column
- start
.column
+ 1;
1370 gcc_assert (line_width
>= (start
.column
- 1 + literal_length
));
1372 from
.len
= literal_length
;
1373 /* Make a copy of the literal, to avoid having to rely on
1374 the lifetime of the copy of the line within the cache.
1375 This will be released by the auto_cpp_string_vec dtor. */
1376 from
.text
= XDUPVEC (unsigned char, literal
, literal_length
);
1377 strs
.safe_push (from
);
1379 /* For very long lines, a new linemap could have started
1380 halfway through the token.
1381 Ensure that the loc_reader uses the linemap of the
1382 *end* of the token for its start location. */
1383 const line_map_ordinary
*final_ord_map
;
1384 linemap_resolve_location (line_table
, src_range
.m_finish
,
1385 LRK_MACRO_EXPANSION_POINT
, &final_ord_map
);
1386 location_t start_loc
1387 = linemap_position_for_line_and_column (line_table
, final_ord_map
,
1388 start
.line
, start
.column
);
1390 cpp_string_location_reader
loc_reader (start_loc
, line_table
);
1391 loc_readers
.safe_push (loc_reader
);
1394 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1395 const char *err
= cpp_interpret_string_ranges (pfile
, strs
.address (),
1396 loc_readers
.address (),
1397 num_locs
, &ranges
, type
);
1401 /* Success: "ranges" should now contain information on the string. */
1405 /* Attempt to populate *OUT_RANGE with source location information on the
1406 range of given characters within the string literal found at STRLOC.
1407 START_IDX and END_IDX refer to offsets within the execution character
1409 If CONCATS is non-NULL, then any string literals that the token at
1410 STRLOC was concatenated with are also considered.
1412 This is implemented by re-parsing the relevant source line(s).
1414 Return NULL if successful, or an error message if any errors occurred.
1415 Error messages are intended for GCC developers (to help debugging) rather
1416 than for end-users. */
1419 get_source_range_for_substring (cpp_reader
*pfile
,
1420 string_concat_db
*concats
,
1422 enum cpp_ttype type
,
1423 int start_idx
, int end_idx
,
1424 source_range
*out_range
)
1426 gcc_checking_assert (start_idx
>= 0);
1427 gcc_checking_assert (end_idx
>= 0);
1428 gcc_assert (out_range
);
1430 cpp_substring_ranges ranges
;
1432 = get_substring_ranges_for_loc (pfile
, concats
, strloc
, type
, ranges
);
1436 if (start_idx
>= ranges
.get_num_ranges ())
1437 return "start_idx out of range";
1438 if (end_idx
>= ranges
.get_num_ranges ())
1439 return "end_idx out of range";
1441 out_range
->m_start
= ranges
.get_range (start_idx
).m_start
;
1442 out_range
->m_finish
= ranges
.get_range (end_idx
).m_finish
;
1446 /* As get_source_range_for_substring, but write to *OUT the number
1447 of ranges that are available. */
1450 get_num_source_ranges_for_substring (cpp_reader
*pfile
,
1451 string_concat_db
*concats
,
1453 enum cpp_ttype type
,
1458 cpp_substring_ranges ranges
;
1460 = get_substring_ranges_for_loc (pfile
, concats
, strloc
, type
, ranges
);
1465 *out
= ranges
.get_num_ranges ();
1471 namespace selftest
{
1473 /* Selftests of location handling. */
1475 /* A class for writing out a temporary sourcefile for use in selftests
1476 of input handling. */
1478 class temp_source_file
1481 temp_source_file (const location
&loc
, const char *suffix
,
1482 const char *content
);
1483 ~temp_source_file ();
1485 const char *get_filename () const { return m_filename
; }
1491 /* Constructor. Create a tempfile using SUFFIX, and write CONTENT to
1492 it. Abort if anything goes wrong, using LOC as the effective
1493 location in the problem report. */
1495 temp_source_file::temp_source_file (const location
&loc
, const char *suffix
,
1496 const char *content
)
1498 m_filename
= make_temp_file (suffix
);
1499 ASSERT_NE (m_filename
, NULL
);
1501 FILE *out
= fopen (m_filename
, "w");
1503 ::selftest::fail_formatted (loc
, "unable to open tempfile: %s",
1505 fprintf (out
, "%s", content
);
1509 /* Destructor. Delete the tempfile. */
1511 temp_source_file::~temp_source_file ()
1513 unlink (m_filename
);
1517 /* Helper function for verifying location data: when location_t
1518 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1519 as having column 0. */
1522 should_have_column_data_p (location_t loc
)
1524 if (IS_ADHOC_LOC (loc
))
1525 loc
= get_location_from_adhoc_loc (line_table
, loc
);
1526 if (loc
> LINE_MAP_MAX_LOCATION_WITH_COLS
)
1531 /* Selftest for should_have_column_data_p. */
1534 test_should_have_column_data_p ()
1536 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT
));
1538 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS
));
1540 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS
+ 1));
1543 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1547 assert_loceq (const char *exp_filename
, int exp_linenum
, int exp_colnum
,
1550 ASSERT_STREQ (exp_filename
, LOCATION_FILE (loc
));
1551 ASSERT_EQ (exp_linenum
, LOCATION_LINE (loc
));
1552 /* If location_t values are sufficiently high, then column numbers
1553 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1554 When close to the threshold, column numbers *may* be present: if
1555 the final linemap before the threshold contains a line that straddles
1556 the threshold, locations in that line have column information. */
1557 if (should_have_column_data_p (loc
))
1558 ASSERT_EQ (exp_colnum
, LOCATION_COLUMN (loc
));
1561 /* Various selftests in this file involve constructing a line table
1562 and one or more line maps within it.
1564 For maximum test coverage we want to run these tests with a variety
1566 - line_table->default_range_bits: some frontends use a non-zero value
1568 - the fallback modes within line-map.c: there are various threshold
1569 values for source_location/location_t beyond line-map.c changes
1570 behavior (disabling of the range-packing optimization, disabling
1571 of column-tracking). We can exercise these by starting the line_table
1572 at interesting values at or near these thresholds.
1574 The following struct describes a particular case within our test
1577 struct line_table_case
1579 line_table_case (int default_range_bits
, int base_location
)
1580 : m_default_range_bits (default_range_bits
),
1581 m_base_location (base_location
)
1584 int m_default_range_bits
;
1585 int m_base_location
;
1588 /* A class for overriding the global "line_table" within a selftest,
1589 restoring its value afterwards. */
1591 class temp_line_table
1594 temp_line_table (const line_table_case
&);
1595 ~temp_line_table ();
1598 line_maps
*m_old_line_table
;
1601 /* Constructor. Store the old value of line_table, and create a new
1602 one, using the sitation described in CASE_. */
1604 temp_line_table::temp_line_table (const line_table_case
&case_
)
1605 : m_old_line_table (line_table
)
1607 line_table
= ggc_alloc
<line_maps
> ();
1608 linemap_init (line_table
, BUILTINS_LOCATION
);
1609 line_table
->reallocator
= m_old_line_table
->reallocator
;
1610 line_table
->round_alloc_size
= m_old_line_table
->round_alloc_size
;
1611 line_table
->default_range_bits
= case_
.m_default_range_bits
;
1612 if (case_
.m_base_location
)
1614 line_table
->highest_location
= case_
.m_base_location
;
1615 line_table
->highest_line
= case_
.m_base_location
;
1619 /* Destructor. Restore the old value of line_table. */
1621 temp_line_table::~temp_line_table ()
1623 line_table
= m_old_line_table
;
1626 /* Verify basic operation of ordinary linemaps. */
1629 test_accessing_ordinary_linemaps (const line_table_case
&case_
)
1631 temp_line_table
tmp_lt (case_
);
1633 /* Build a simple linemap describing some locations. */
1634 linemap_add (line_table
, LC_ENTER
, false, "foo.c", 0);
1636 linemap_line_start (line_table
, 1, 100);
1637 location_t loc_a
= linemap_position_for_column (line_table
, 1);
1638 location_t loc_b
= linemap_position_for_column (line_table
, 23);
1640 linemap_line_start (line_table
, 2, 100);
1641 location_t loc_c
= linemap_position_for_column (line_table
, 1);
1642 location_t loc_d
= linemap_position_for_column (line_table
, 17);
1644 /* Example of a very long line. */
1645 linemap_line_start (line_table
, 3, 2000);
1646 location_t loc_e
= linemap_position_for_column (line_table
, 700);
1648 linemap_add (line_table
, LC_LEAVE
, false, NULL
, 0);
1650 /* Multiple files. */
1651 linemap_add (line_table
, LC_ENTER
, false, "bar.c", 0);
1652 linemap_line_start (line_table
, 1, 200);
1653 location_t loc_f
= linemap_position_for_column (line_table
, 150);
1654 linemap_add (line_table
, LC_LEAVE
, false, NULL
, 0);
1656 /* Verify that we can recover the location info. */
1657 assert_loceq ("foo.c", 1, 1, loc_a
);
1658 assert_loceq ("foo.c", 1, 23, loc_b
);
1659 assert_loceq ("foo.c", 2, 1, loc_c
);
1660 assert_loceq ("foo.c", 2, 17, loc_d
);
1661 assert_loceq ("foo.c", 3, 700, loc_e
);
1662 assert_loceq ("bar.c", 1, 150, loc_f
);
1664 ASSERT_FALSE (is_location_from_builtin_token (loc_a
));
1665 ASSERT_TRUE (pure_location_p (line_table
, loc_a
));
1667 /* Verify using make_location to build a range, and extracting data
1669 location_t range_c_b_d
= make_location (loc_c
, loc_b
, loc_d
);
1670 ASSERT_FALSE (pure_location_p (line_table
, range_c_b_d
));
1671 ASSERT_EQ (loc_c
, get_location_from_adhoc_loc (line_table
, range_c_b_d
));
1672 source_range src_range
= get_range_from_loc (line_table
, range_c_b_d
);
1673 ASSERT_EQ (loc_b
, src_range
.m_start
);
1674 ASSERT_EQ (loc_d
, src_range
.m_finish
);
1677 /* Verify various properties of UNKNOWN_LOCATION. */
1680 test_unknown_location ()
1682 ASSERT_EQ (NULL
, LOCATION_FILE (UNKNOWN_LOCATION
));
1683 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION
));
1684 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION
));
1687 /* Verify various properties of BUILTINS_LOCATION. */
1692 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION
);
1693 ASSERT_PRED1 (is_location_from_builtin_token
, BUILTINS_LOCATION
);
1696 /* Verify reading of input files (e.g. for caret-based diagnostics). */
1699 test_reading_source_line ()
1701 /* Create a tempfile and write some text to it. */
1702 temp_source_file
tmp (SELFTEST_LOCATION
, ".txt",
1703 "01234567890123456789\n"
1704 "This is the test text\n"
1705 "This is the 3rd line\n");
1707 /* Read back a specific line from the tempfile. */
1709 const char *source_line
= location_get_source_line (tmp
.get_filename (),
1711 ASSERT_TRUE (source_line
!= NULL
);
1712 ASSERT_EQ (21, line_size
);
1713 if (!strncmp ("This is the test text",
1714 source_line
, line_size
))
1715 ::selftest::pass (SELFTEST_LOCATION
,
1716 "source_line matched expected value");
1718 ::selftest::fail (SELFTEST_LOCATION
,
1719 "source_line did not match expected value");
1723 /* Tests of lexing. */
1725 /* Verify that token TOK from PARSER has cpp_token_as_text
1726 equal to EXPECTED_TEXT. */
1728 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1729 SELFTEST_BEGIN_STMT \
1730 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1731 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1734 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1735 and ranges from EXP_START_COL to EXP_FINISH_COL.
1736 Use LOC as the effective location of the selftest. */
1739 assert_token_loc_eq (const location
&loc
,
1740 const cpp_token
*tok
,
1741 const char *exp_filename
, int exp_linenum
,
1742 int exp_start_col
, int exp_finish_col
)
1744 location_t tok_loc
= tok
->src_loc
;
1745 ASSERT_STREQ_AT (loc
, exp_filename
, LOCATION_FILE (tok_loc
));
1746 ASSERT_EQ_AT (loc
, exp_linenum
, LOCATION_LINE (tok_loc
));
1748 /* If location_t values are sufficiently high, then column numbers
1749 will be unavailable. */
1750 if (!should_have_column_data_p (tok_loc
))
1753 ASSERT_EQ_AT (loc
, exp_start_col
, LOCATION_COLUMN (tok_loc
));
1754 source_range tok_range
= get_range_from_loc (line_table
, tok_loc
);
1755 ASSERT_EQ_AT (loc
, exp_start_col
, LOCATION_COLUMN (tok_range
.m_start
));
1756 ASSERT_EQ_AT (loc
, exp_finish_col
, LOCATION_COLUMN (tok_range
.m_finish
));
1759 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1760 SELFTEST_LOCATION as the effective location of the selftest. */
1762 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1763 EXP_START_COL, EXP_FINISH_COL) \
1764 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1765 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1767 /* Test of lexing a file using libcpp, verifying tokens and their
1768 location information. */
1771 test_lexer (const line_table_case
&case_
)
1773 /* Create a tempfile and write some text to it. */
1774 const char *content
=
1775 /*00000000011111111112222222222333333.3333444444444.455555555556
1776 12345678901234567890123456789012345.6789012345678.901234567890. */
1777 ("test_name /* c-style comment */\n"
1778 " \"test literal\"\n"
1779 " // test c++-style comment\n"
1781 temp_source_file
tmp (SELFTEST_LOCATION
, ".txt", content
);
1783 temp_line_table
tmp_lt (case_
);
1785 cpp_reader
*parser
= cpp_create_reader (CLK_GNUC89
, NULL
, line_table
);
1787 const char *fname
= cpp_read_main_file (parser
, tmp
.get_filename ());
1788 ASSERT_NE (fname
, NULL
);
1790 /* Verify that we get the expected tokens back, with the correct
1791 location information. */
1794 const cpp_token
*tok
;
1795 tok
= cpp_get_token_with_location (parser
, &loc
);
1796 ASSERT_NE (tok
, NULL
);
1797 ASSERT_EQ (tok
->type
, CPP_NAME
);
1798 ASSERT_TOKEN_AS_TEXT_EQ (parser
, tok
, "test_name");
1799 ASSERT_TOKEN_LOC_EQ (tok
, tmp
.get_filename (), 1, 1, 9);
1801 tok
= cpp_get_token_with_location (parser
, &loc
);
1802 ASSERT_NE (tok
, NULL
);
1803 ASSERT_EQ (tok
->type
, CPP_STRING
);
1804 ASSERT_TOKEN_AS_TEXT_EQ (parser
, tok
, "\"test literal\"");
1805 ASSERT_TOKEN_LOC_EQ (tok
, tmp
.get_filename (), 2, 35, 48);
1807 tok
= cpp_get_token_with_location (parser
, &loc
);
1808 ASSERT_NE (tok
, NULL
);
1809 ASSERT_EQ (tok
->type
, CPP_NUMBER
);
1810 ASSERT_TOKEN_AS_TEXT_EQ (parser
, tok
, "42");
1811 ASSERT_TOKEN_LOC_EQ (tok
, tmp
.get_filename (), 4, 4, 5);
1813 tok
= cpp_get_token_with_location (parser
, &loc
);
1814 ASSERT_NE (tok
, NULL
);
1815 ASSERT_EQ (tok
->type
, CPP_EOF
);
1817 cpp_finish (parser
, NULL
);
1818 cpp_destroy (parser
);
1821 /* Forward decls. */
1824 class lexer_test_options
;
1826 /* A class for specifying options of a lexer_test.
1827 The "apply" vfunc is called during the lexer_test constructor. */
1829 class lexer_test_options
1832 virtual void apply (lexer_test
&) = 0;
1835 /* A struct for writing lexer tests. */
1839 lexer_test (const line_table_case
&case_
, const char *content
,
1840 lexer_test_options
*options
);
1843 const cpp_token
*get_token ();
1845 temp_source_file m_tempfile
;
1846 temp_line_table m_tmp_lt
;
1847 cpp_reader
*m_parser
;
1848 string_concat_db m_concats
;
1851 /* Use an EBCDIC encoding for the execution charset, specifically
1852 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
1854 This exercises iconv integration within libcpp.
1855 Not every build of iconv supports the given charset,
1856 so we need to flag this error and handle it gracefully. */
1858 class ebcdic_execution_charset
: public lexer_test_options
1861 ebcdic_execution_charset () : m_num_iconv_errors (0)
1863 gcc_assert (s_singleton
== NULL
);
1866 ~ebcdic_execution_charset ()
1868 gcc_assert (s_singleton
== this);
1872 void apply (lexer_test
&test
) FINAL OVERRIDE
1874 cpp_options
*cpp_opts
= cpp_get_options (test
.m_parser
);
1875 cpp_opts
->narrow_charset
= "IBM1047";
1877 cpp_callbacks
*callbacks
= cpp_get_callbacks (test
.m_parser
);
1878 callbacks
->error
= on_error
;
1881 static bool on_error (cpp_reader
*pfile ATTRIBUTE_UNUSED
,
1882 int level ATTRIBUTE_UNUSED
,
1883 int reason ATTRIBUTE_UNUSED
,
1884 rich_location
*richloc ATTRIBUTE_UNUSED
,
1885 const char *msgid
, va_list *ap ATTRIBUTE_UNUSED
)
1886 ATTRIBUTE_FPTR_PRINTF(5,0)
1888 gcc_assert (s_singleton
);
1889 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
1890 when the local iconv build doesn't support the conversion. */
1891 if (strstr (msgid
, "not supported by iconv"))
1893 s_singleton
->m_num_iconv_errors
++;
1897 /* Otherwise, we have an unexpected error. */
1901 bool iconv_errors_occurred_p () const { return m_num_iconv_errors
> 0; }
1904 static ebcdic_execution_charset
*s_singleton
;
1905 int m_num_iconv_errors
;
1908 ebcdic_execution_charset
*ebcdic_execution_charset::s_singleton
;
1910 /* Constructor. Override line_table with a new instance based on CASE_,
1911 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
1912 start parsing the tempfile. */
1914 lexer_test::lexer_test (const line_table_case
&case_
, const char *content
,
1915 lexer_test_options
*options
) :
1916 /* Create a tempfile and write the text to it. */
1917 m_tempfile (SELFTEST_LOCATION
, ".c", content
),
1919 m_parser (cpp_create_reader (CLK_GNUC99
, NULL
, line_table
)),
1923 options
->apply (*this);
1925 cpp_init_iconv (m_parser
);
1927 /* Parse the file. */
1928 const char *fname
= cpp_read_main_file (m_parser
,
1929 m_tempfile
.get_filename ());
1930 ASSERT_NE (fname
, NULL
);
1933 /* Destructor. Verify that the next token in m_parser is EOF. */
1935 lexer_test::~lexer_test ()
1938 const cpp_token
*tok
;
1940 tok
= cpp_get_token_with_location (m_parser
, &loc
);
1941 ASSERT_NE (tok
, NULL
);
1942 ASSERT_EQ (tok
->type
, CPP_EOF
);
1944 cpp_finish (m_parser
, NULL
);
1945 cpp_destroy (m_parser
);
1948 /* Get the next token from m_parser. */
1951 lexer_test::get_token ()
1954 const cpp_token
*tok
;
1956 tok
= cpp_get_token_with_location (m_parser
, &loc
);
1957 ASSERT_NE (tok
, NULL
);
1961 /* Verify that locations within string literals are correctly handled. */
1963 /* Verify get_source_range_for_substring for token(s) at STRLOC,
1964 using the string concatenation database for TEST.
1966 Assert that the character at index IDX is on EXPECTED_LINE,
1967 and that it begins at column EXPECTED_START_COL and ends at
1968 EXPECTED_FINISH_COL (unless the locations are beyond
1969 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
1973 assert_char_at_range (const location
&loc
,
1975 location_t strloc
, enum cpp_ttype type
, int idx
,
1976 int expected_line
, int expected_start_col
,
1977 int expected_finish_col
)
1979 cpp_reader
*pfile
= test
.m_parser
;
1980 string_concat_db
*concats
= &test
.m_concats
;
1982 source_range actual_range
;
1984 = get_source_range_for_substring (pfile
, concats
, strloc
, type
,
1985 idx
, idx
, &actual_range
);
1986 if (should_have_column_data_p (strloc
))
1987 ASSERT_EQ_AT (loc
, NULL
, err
);
1990 ASSERT_STREQ_AT (loc
,
1991 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
1996 int actual_start_line
= LOCATION_LINE (actual_range
.m_start
);
1997 ASSERT_EQ_AT (loc
, expected_line
, actual_start_line
);
1998 int actual_finish_line
= LOCATION_LINE (actual_range
.m_finish
);
1999 ASSERT_EQ_AT (loc
, expected_line
, actual_finish_line
);
2001 if (should_have_column_data_p (actual_range
.m_start
))
2003 int actual_start_col
= LOCATION_COLUMN (actual_range
.m_start
);
2004 ASSERT_EQ_AT (loc
, expected_start_col
, actual_start_col
);
2006 if (should_have_column_data_p (actual_range
.m_finish
))
2008 int actual_finish_col
= LOCATION_COLUMN (actual_range
.m_finish
);
2009 ASSERT_EQ_AT (loc
, expected_finish_col
, actual_finish_col
);
2013 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2014 the effective location of any errors. */
2016 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2017 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2018 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2019 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2020 (EXPECTED_FINISH_COL))
2022 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2023 using the string concatenation database for TEST.
2025 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2028 assert_num_substring_ranges (const location
&loc
,
2031 enum cpp_ttype type
,
2032 int expected_num_ranges
)
2034 cpp_reader
*pfile
= test
.m_parser
;
2035 string_concat_db
*concats
= &test
.m_concats
;
2037 int actual_num_ranges
;
2039 = get_num_source_ranges_for_substring (pfile
, concats
, strloc
, type
,
2040 &actual_num_ranges
);
2041 if (should_have_column_data_p (strloc
))
2042 ASSERT_EQ_AT (loc
, NULL
, err
);
2045 ASSERT_STREQ_AT (loc
,
2046 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2050 ASSERT_EQ_AT (loc
, expected_num_ranges
, actual_num_ranges
);
2053 /* Macro for calling assert_num_substring_ranges, supplying
2054 SELFTEST_LOCATION for the effective location of any errors. */
2056 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2057 EXPECTED_NUM_RANGES) \
2058 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2059 (TYPE), (EXPECTED_NUM_RANGES))
2062 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2063 returns an error (using the string concatenation database for TEST). */
2066 assert_has_no_substring_ranges (const location
&loc
,
2069 enum cpp_ttype type
,
2070 const char *expected_err
)
2072 cpp_reader
*pfile
= test
.m_parser
;
2073 string_concat_db
*concats
= &test
.m_concats
;
2074 cpp_substring_ranges ranges
;
2075 const char *actual_err
2076 = get_substring_ranges_for_loc (pfile
, concats
, strloc
,
2078 if (should_have_column_data_p (strloc
))
2079 ASSERT_STREQ_AT (loc
, expected_err
, actual_err
);
2081 ASSERT_STREQ_AT (loc
,
2082 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2086 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2087 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2088 (STRLOC), (TYPE), (ERR))
2090 /* Lex a simple string literal. Verify the substring location data, before
2091 and after running cpp_interpret_string on it. */
2094 test_lexer_string_locations_simple (const line_table_case
&case_
)
2096 /* Digits 0-9 (with 0 at column 10), the simple way.
2097 ....................000000000.11111111112.2222222223333333333
2098 ....................123456789.01234567890.1234567890123456789
2099 We add a trailing comment to ensure that we correctly locate
2100 the end of the string literal token. */
2101 const char *content
= " \"0123456789\" /* not a string */\n";
2102 lexer_test
test (case_
, content
, NULL
);
2104 /* Verify that we get the expected token back, with the correct
2105 location information. */
2106 const cpp_token
*tok
= test
.get_token ();
2107 ASSERT_EQ (tok
->type
, CPP_STRING
);
2108 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"0123456789\"");
2109 ASSERT_TOKEN_LOC_EQ (tok
, test
.m_tempfile
.get_filename (), 1, 9, 20);
2111 /* At this point in lexing, the quote characters are treated as part of
2112 the string (they are stripped off by cpp_interpret_string). */
2114 ASSERT_EQ (tok
->val
.str
.len
, 12);
2116 /* Verify that cpp_interpret_string works. */
2117 cpp_string dst_string
;
2118 const enum cpp_ttype type
= CPP_STRING
;
2119 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
2121 ASSERT_TRUE (result
);
2122 ASSERT_STREQ ("0123456789", (const char *)dst_string
.text
);
2123 free (const_cast <unsigned char *> (dst_string
.text
));
2125 /* Verify ranges of individual characters. This no longer includes the
2127 for (int i
= 0; i
<= 9; i
++)
2128 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1,
2131 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, type
, 10);
2134 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2138 test_lexer_string_locations_ebcdic (const line_table_case
&case_
)
2140 /* EBCDIC support requires iconv. */
2144 /* Digits 0-9 (with 0 at column 10), the simple way.
2145 ....................000000000.11111111112.2222222223333333333
2146 ....................123456789.01234567890.1234567890123456789
2147 We add a trailing comment to ensure that we correctly locate
2148 the end of the string literal token. */
2149 const char *content
= " \"0123456789\" /* not a string */\n";
2150 ebcdic_execution_charset use_ebcdic
;
2151 lexer_test
test (case_
, content
, &use_ebcdic
);
2153 /* Verify that we get the expected token back, with the correct
2154 location information. */
2155 const cpp_token
*tok
= test
.get_token ();
2156 ASSERT_EQ (tok
->type
, CPP_STRING
);
2157 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"0123456789\"");
2158 ASSERT_TOKEN_LOC_EQ (tok
, test
.m_tempfile
.get_filename (), 1, 9, 20);
2160 /* At this point in lexing, the quote characters are treated as part of
2161 the string (they are stripped off by cpp_interpret_string). */
2163 ASSERT_EQ (tok
->val
.str
.len
, 12);
2165 /* The remainder of the test requires an iconv implementation that
2166 can convert from UTF-8 to the EBCDIC encoding requested above. */
2167 if (use_ebcdic
.iconv_errors_occurred_p ())
2170 /* Verify that cpp_interpret_string works. */
2171 cpp_string dst_string
;
2172 const enum cpp_ttype type
= CPP_STRING
;
2173 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
2175 ASSERT_TRUE (result
);
2176 /* We should now have EBCDIC-encoded text, specifically
2177 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2178 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2179 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2180 (const char *)dst_string
.text
);
2181 free (const_cast <unsigned char *> (dst_string
.text
));
2183 /* Verify that we don't attempt to record substring location information
2185 ASSERT_HAS_NO_SUBSTRING_RANGES
2186 (test
, tok
->src_loc
, type
,
2187 "execution character set != source character set");
2190 /* Lex a string literal containing a hex-escaped character.
2191 Verify the substring location data, before and after running
2192 cpp_interpret_string on it. */
2195 test_lexer_string_locations_hex (const line_table_case
&case_
)
2197 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2198 and with a space in place of digit 6, to terminate the escaped
2200 ....................000000000.111111.11112222.
2201 ....................123456789.012345.67890123. */
2202 const char *content
= " \"01234\\x35 789\"\n";
2203 lexer_test
test (case_
, content
, NULL
);
2205 /* Verify that we get the expected token back, with the correct
2206 location information. */
2207 const cpp_token
*tok
= test
.get_token ();
2208 ASSERT_EQ (tok
->type
, CPP_STRING
);
2209 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"01234\\x35 789\"");
2210 ASSERT_TOKEN_LOC_EQ (tok
, test
.m_tempfile
.get_filename (), 1, 9, 23);
2212 /* At this point in lexing, the quote characters are treated as part of
2213 the string (they are stripped off by cpp_interpret_string). */
2214 ASSERT_EQ (tok
->val
.str
.len
, 15);
2216 /* Verify that cpp_interpret_string works. */
2217 cpp_string dst_string
;
2218 const enum cpp_ttype type
= CPP_STRING
;
2219 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
2221 ASSERT_TRUE (result
);
2222 ASSERT_STREQ ("012345 789", (const char *)dst_string
.text
);
2223 free (const_cast <unsigned char *> (dst_string
.text
));
2225 /* Verify ranges of individual characters. This no longer includes the
2227 for (int i
= 0; i
<= 4; i
++)
2228 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 10 + i
, 10 + i
);
2229 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, 5, 1, 15, 18);
2230 for (int i
= 6; i
<= 9; i
++)
2231 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 13 + i
, 13 + i
);
2233 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, type
, 10);
2236 /* Lex a string literal containing an octal-escaped character.
2237 Verify the substring location data after running cpp_interpret_string
2241 test_lexer_string_locations_oct (const line_table_case
&case_
)
2243 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2244 and with a space in place of digit 6, to terminate the escaped
2246 ....................000000000.111111.11112222.2222223333333333444
2247 ....................123456789.012345.67890123.4567890123456789012 */
2248 const char *content
= " \"01234\\065 789\" /* not a string */\n";
2249 lexer_test
test (case_
, content
, NULL
);
2251 /* Verify that we get the expected token back, with the correct
2252 location information. */
2253 const cpp_token
*tok
= test
.get_token ();
2254 ASSERT_EQ (tok
->type
, CPP_STRING
);
2255 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"01234\\065 789\"");
2257 /* Verify that cpp_interpret_string works. */
2258 cpp_string dst_string
;
2259 const enum cpp_ttype type
= CPP_STRING
;
2260 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
2262 ASSERT_TRUE (result
);
2263 ASSERT_STREQ ("012345 789", (const char *)dst_string
.text
);
2264 free (const_cast <unsigned char *> (dst_string
.text
));
2266 /* Verify ranges of individual characters. This no longer includes the
2268 for (int i
= 0; i
< 5; i
++)
2269 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 10 + i
, 10 + i
);
2270 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, 5, 1, 15, 18);
2271 for (int i
= 6; i
<= 9; i
++)
2272 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 13 + i
, 13 + i
);
2274 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, type
, 10);
2277 /* Test of string literal containing letter escapes. */
2280 test_lexer_string_locations_letter_escape_1 (const line_table_case
&case_
)
2282 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2283 .....................000000000.1.11111.1.1.11222.22222223333333
2284 .....................123456789.0.12345.6.7.89012.34567890123456. */
2285 const char *content
= (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2286 lexer_test
test (case_
, content
, NULL
);
2288 /* Verify that we get the expected tokens back. */
2289 const cpp_token
*tok
= test
.get_token ();
2290 ASSERT_EQ (tok
->type
, CPP_STRING
);
2291 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"\\tfoo\\\\\\nbar\"");
2293 /* Verify ranges of individual characters. */
2295 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
2298 for (int i
= 1; i
<= 3; i
++)
2299 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
2300 i
, 1, 11 + i
, 11 + i
);
2301 /* "\\" and "\n". */
2302 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
2304 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
2308 for (int i
= 6; i
<= 8; i
++)
2309 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
2310 i
, 1, 13 + i
, 13 + i
);
2312 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, CPP_STRING
, 9);
2315 /* Another test of a string literal containing a letter escape.
2316 Based on string seen in
2318 in gcc.dg/format/c90-printf-1.c. */
2321 test_lexer_string_locations_letter_escape_2 (const line_table_case
&case_
)
2323 /* .....................000000000.1111.11.1111.22222222223.
2324 .....................123456789.0123.45.6789.01234567890. */
2325 const char *content
= (" \"%-%\\n\" /* non-str */\n");
2326 lexer_test
test (case_
, content
, NULL
);
2328 /* Verify that we get the expected tokens back. */
2329 const cpp_token
*tok
= test
.get_token ();
2330 ASSERT_EQ (tok
->type
, CPP_STRING
);
2331 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"%-%\\n\"");
2333 /* Verify ranges of individual characters. */
2335 for (int i
= 0; i
< 3; i
++)
2336 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
2337 i
, 1, 10 + i
, 10 + i
);
2339 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
2342 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, CPP_STRING
, 4);
2345 /* Lex a string literal containing UCN 4 characters.
2346 Verify the substring location data after running cpp_interpret_string
2350 test_lexer_string_locations_ucn4 (const line_table_case
&case_
)
2352 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2354 ....................000000000.111111.111122.222222223.33333333344444
2355 ....................123456789.012345.678901.234567890.12345678901234 */
2356 const char *content
= " \"01234\\u2174\\u2175789\" /* non-str */\n";
2357 lexer_test
test (case_
, content
, NULL
);
2359 /* Verify that we get the expected token back, with the correct
2360 location information. */
2361 const cpp_token
*tok
= test
.get_token ();
2362 ASSERT_EQ (tok
->type
, CPP_STRING
);
2363 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"01234\\u2174\\u2175789\"");
2365 /* Verify that cpp_interpret_string works.
2366 The string should be encoded in the execution character
2367 set. Assuming that that is UTF-8, we should have the following:
2368 ----------- ---- ----- ------- ----------------
2369 Byte offset Byte Octal Unicode Source Column(s)
2370 ----------- ---- ----- ------- ----------------
2376 5 0xE2 \342 U+2174 15-20
2377 6 0x85 \205 (cont) 15-20
2378 7 0xB4 \264 (cont) 15-20
2379 8 0xE2 \342 U+2175 21-26
2380 9 0x85 \205 (cont) 21-26
2381 10 0xB5 \265 (cont) 21-26
2385 ----------- ---- ----- ------- ---------------. */
2387 cpp_string dst_string
;
2388 const enum cpp_ttype type
= CPP_STRING
;
2389 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
2391 ASSERT_TRUE (result
);
2392 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2393 (const char *)dst_string
.text
);
2394 free (const_cast <unsigned char *> (dst_string
.text
));
2396 /* Verify ranges of individual characters. This no longer includes the
2399 for (int i
= 0; i
<= 4; i
++)
2400 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 10 + i
, 10 + i
);
2402 for (int i
= 5; i
<= 7; i
++)
2403 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 15, 20);
2405 for (int i
= 8; i
<= 10; i
++)
2406 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 21, 26);
2408 for (int i
= 11; i
<= 13; i
++)
2409 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 16 + i
, 16 + i
);
2411 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, type
, 14);
2414 /* Lex a string literal containing UCN 8 characters.
2415 Verify the substring location data after running cpp_interpret_string
2419 test_lexer_string_locations_ucn8 (const line_table_case
&case_
)
2421 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2422 ....................000000000.111111.1111222222.2222333333333.344444
2423 ....................123456789.012345.6789012345.6789012345678.901234 */
2424 const char *content
= " \"01234\\U00002174\\U00002175789\" /* */\n";
2425 lexer_test
test (case_
, content
, NULL
);
2427 /* Verify that we get the expected token back, with the correct
2428 location information. */
2429 const cpp_token
*tok
= test
.get_token ();
2430 ASSERT_EQ (tok
->type
, CPP_STRING
);
2431 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
,
2432 "\"01234\\U00002174\\U00002175789\"");
2434 /* Verify that cpp_interpret_string works.
2435 The UTF-8 encoding of the string is identical to that from
2436 the ucn4 testcase above; the only difference is the column
2438 cpp_string dst_string
;
2439 const enum cpp_ttype type
= CPP_STRING
;
2440 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
2442 ASSERT_TRUE (result
);
2443 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2444 (const char *)dst_string
.text
);
2445 free (const_cast <unsigned char *> (dst_string
.text
));
2447 /* Verify ranges of individual characters. This no longer includes the
2450 for (int i
= 0; i
<= 4; i
++)
2451 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 10 + i
, 10 + i
);
2453 for (int i
= 5; i
<= 7; i
++)
2454 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 15, 24);
2456 for (int i
= 8; i
<= 10; i
++)
2457 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 25, 34);
2458 /* '789' at columns 35-37 */
2459 for (int i
= 11; i
<= 13; i
++)
2460 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 24 + i
, 24 + i
);
2462 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, type
, 14);
2465 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2468 uint32_from_big_endian (const uint32_t *ptr_be_value
)
2470 const unsigned char *buf
= (const unsigned char *)ptr_be_value
;
2471 return (((uint32_t) buf
[0] << 24)
2472 | ((uint32_t) buf
[1] << 16)
2473 | ((uint32_t) buf
[2] << 8)
2474 | (uint32_t) buf
[3]);
2477 /* Lex a wide string literal and verify that attempts to read substring
2478 location data from it fail gracefully. */
2481 test_lexer_string_locations_wide_string (const line_table_case
&case_
)
2484 ....................000000000.11111111112.22222222233333
2485 ....................123456789.01234567890.12345678901234 */
2486 const char *content
= " L\"0123456789\" /* non-str */\n";
2487 lexer_test
test (case_
, content
, NULL
);
2489 /* Verify that we get the expected token back, with the correct
2490 location information. */
2491 const cpp_token
*tok
= test
.get_token ();
2492 ASSERT_EQ (tok
->type
, CPP_WSTRING
);
2493 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "L\"0123456789\"");
2495 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2496 cpp_string dst_string
;
2497 const enum cpp_ttype type
= CPP_WSTRING
;
2498 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
2500 ASSERT_TRUE (result
);
2501 /* The cpp_reader defaults to big-endian with
2502 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2503 now be encoded as UTF-32BE. */
2504 const uint32_t *be32_chars
= (const uint32_t *)dst_string
.text
;
2505 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars
[0]));
2506 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars
[5]));
2507 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars
[9]));
2508 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars
[10]));
2509 free (const_cast <unsigned char *> (dst_string
.text
));
2511 /* We don't yet support generating substring location information
2513 ASSERT_HAS_NO_SUBSTRING_RANGES
2514 (test
, tok
->src_loc
, type
,
2515 "execution character set != source character set");
2518 /* Fetch a big-endian 16-bit value and convert to host endianness. */
2521 uint16_from_big_endian (const uint16_t *ptr_be_value
)
2523 const unsigned char *buf
= (const unsigned char *)ptr_be_value
;
2524 return ((uint16_t) buf
[0] << 8) | (uint16_t) buf
[1];
2527 /* Lex a u"" string literal and verify that attempts to read substring
2528 location data from it fail gracefully. */
2531 test_lexer_string_locations_string16 (const line_table_case
&case_
)
2534 ....................000000000.11111111112.22222222233333
2535 ....................123456789.01234567890.12345678901234 */
2536 const char *content
= " u\"0123456789\" /* non-str */\n";
2537 lexer_test
test (case_
, content
, NULL
);
2539 /* Verify that we get the expected token back, with the correct
2540 location information. */
2541 const cpp_token
*tok
= test
.get_token ();
2542 ASSERT_EQ (tok
->type
, CPP_STRING16
);
2543 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "u\"0123456789\"");
2545 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2546 cpp_string dst_string
;
2547 const enum cpp_ttype type
= CPP_STRING16
;
2548 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
2550 ASSERT_TRUE (result
);
2552 /* The cpp_reader defaults to big-endian, so dst_string should
2553 now be encoded as UTF-16BE. */
2554 const uint16_t *be16_chars
= (const uint16_t *)dst_string
.text
;
2555 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars
[0]));
2556 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars
[5]));
2557 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars
[9]));
2558 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars
[10]));
2559 free (const_cast <unsigned char *> (dst_string
.text
));
2561 /* We don't yet support generating substring location information
2563 ASSERT_HAS_NO_SUBSTRING_RANGES
2564 (test
, tok
->src_loc
, type
,
2565 "execution character set != source character set");
2568 /* Lex a U"" string literal and verify that attempts to read substring
2569 location data from it fail gracefully. */
2572 test_lexer_string_locations_string32 (const line_table_case
&case_
)
2575 ....................000000000.11111111112.22222222233333
2576 ....................123456789.01234567890.12345678901234 */
2577 const char *content
= " U\"0123456789\" /* non-str */\n";
2578 lexer_test
test (case_
, content
, NULL
);
2580 /* Verify that we get the expected token back, with the correct
2581 location information. */
2582 const cpp_token
*tok
= test
.get_token ();
2583 ASSERT_EQ (tok
->type
, CPP_STRING32
);
2584 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "U\"0123456789\"");
2586 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2587 cpp_string dst_string
;
2588 const enum cpp_ttype type
= CPP_STRING32
;
2589 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
2591 ASSERT_TRUE (result
);
2593 /* The cpp_reader defaults to big-endian, so dst_string should
2594 now be encoded as UTF-32BE. */
2595 const uint32_t *be32_chars
= (const uint32_t *)dst_string
.text
;
2596 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars
[0]));
2597 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars
[5]));
2598 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars
[9]));
2599 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars
[10]));
2600 free (const_cast <unsigned char *> (dst_string
.text
));
2602 /* We don't yet support generating substring location information
2604 ASSERT_HAS_NO_SUBSTRING_RANGES
2605 (test
, tok
->src_loc
, type
,
2606 "execution character set != source character set");
2609 /* Lex a u8-string literal.
2610 Verify the substring location data after running cpp_interpret_string
2614 test_lexer_string_locations_u8 (const line_table_case
&case_
)
2617 ....................000000000.11111111112.22222222233333
2618 ....................123456789.01234567890.12345678901234 */
2619 const char *content
= " u8\"0123456789\" /* non-str */\n";
2620 lexer_test
test (case_
, content
, NULL
);
2622 /* Verify that we get the expected token back, with the correct
2623 location information. */
2624 const cpp_token
*tok
= test
.get_token ();
2625 ASSERT_EQ (tok
->type
, CPP_UTF8STRING
);
2626 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "u8\"0123456789\"");
2628 /* Verify that cpp_interpret_string works. */
2629 cpp_string dst_string
;
2630 const enum cpp_ttype type
= CPP_STRING
;
2631 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
2633 ASSERT_TRUE (result
);
2634 ASSERT_STREQ ("0123456789", (const char *)dst_string
.text
);
2635 free (const_cast <unsigned char *> (dst_string
.text
));
2637 /* Verify ranges of individual characters. This no longer includes the
2639 for (int i
= 0; i
<= 9; i
++)
2640 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 10 + i
, 10 + i
);
2643 /* Lex a string literal containing UTF-8 source characters.
2644 Verify the substring location data after running cpp_interpret_string
2648 test_lexer_string_locations_utf8_source (const line_table_case
&case_
)
2650 /* This string literal is written out to the source file as UTF-8,
2651 and is of the form "before mojibake after", where "mojibake"
2652 is written as the following four unicode code points:
2653 U+6587 CJK UNIFIED IDEOGRAPH-6587
2654 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2655 U+5316 CJK UNIFIED IDEOGRAPH-5316
2656 U+3051 HIRAGANA LETTER KE.
2657 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2658 "before" and "after" are 1 byte per unicode character.
2660 The numbering shown are "columns", which are *byte* numbers within
2661 the line, rather than unicode character numbers.
2663 .................... 000000000.1111111.
2664 .................... 123456789.0123456. */
2665 const char *content
= (" \"before "
2666 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2667 UTF-8: 0xE6 0x96 0x87
2668 C octal escaped UTF-8: \346\226\207
2669 "column" numbers: 17-19. */
2672 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2673 UTF-8: 0xE5 0xAD 0x97
2674 C octal escaped UTF-8: \345\255\227
2675 "column" numbers: 20-22. */
2678 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2679 UTF-8: 0xE5 0x8C 0x96
2680 C octal escaped UTF-8: \345\214\226
2681 "column" numbers: 23-25. */
2684 /* U+3051 HIRAGANA LETTER KE
2685 UTF-8: 0xE3 0x81 0x91
2686 C octal escaped UTF-8: \343\201\221
2687 "column" numbers: 26-28. */
2690 /* column numbers 29 onwards
2691 2333333.33334444444444
2692 9012345.67890123456789. */
2693 " after\" /* non-str */\n");
2694 lexer_test
test (case_
, content
, NULL
);
2696 /* Verify that we get the expected token back, with the correct
2697 location information. */
2698 const cpp_token
*tok
= test
.get_token ();
2699 ASSERT_EQ (tok
->type
, CPP_STRING
);
2700 ASSERT_TOKEN_AS_TEXT_EQ
2701 (test
.m_parser
, tok
,
2702 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2704 /* Verify that cpp_interpret_string works. */
2705 cpp_string dst_string
;
2706 const enum cpp_ttype type
= CPP_STRING
;
2707 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
2709 ASSERT_TRUE (result
);
2711 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2712 (const char *)dst_string
.text
);
2713 free (const_cast <unsigned char *> (dst_string
.text
));
2715 /* Verify ranges of individual characters. This no longer includes the
2717 Assuming that both source and execution encodings are UTF-8, we have
2718 a run of 25 octets in each. */
2719 for (int i
= 0; i
< 25; i
++)
2720 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 10 + i
, 10 + i
);
2722 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, type
, 25);
2725 /* Test of string literal concatenation. */
2728 test_lexer_string_locations_concatenation_1 (const line_table_case
&case_
)
2731 .....................000000000.111111.11112222222222
2732 .....................123456789.012345.67890123456789. */
2733 const char *content
= (" \"01234\" /* non-str */\n"
2734 " \"56789\" /* non-str */\n");
2735 lexer_test
test (case_
, content
, NULL
);
2737 location_t input_locs
[2];
2739 /* Verify that we get the expected tokens back. */
2740 auto_vec
<cpp_string
> input_strings
;
2741 const cpp_token
*tok_a
= test
.get_token ();
2742 ASSERT_EQ (tok_a
->type
, CPP_STRING
);
2743 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok_a
, "\"01234\"");
2744 input_strings
.safe_push (tok_a
->val
.str
);
2745 input_locs
[0] = tok_a
->src_loc
;
2747 const cpp_token
*tok_b
= test
.get_token ();
2748 ASSERT_EQ (tok_b
->type
, CPP_STRING
);
2749 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok_b
, "\"56789\"");
2750 input_strings
.safe_push (tok_b
->val
.str
);
2751 input_locs
[1] = tok_b
->src_loc
;
2753 /* Verify that cpp_interpret_string works. */
2754 cpp_string dst_string
;
2755 const enum cpp_ttype type
= CPP_STRING
;
2756 bool result
= cpp_interpret_string (test
.m_parser
,
2757 input_strings
.address (), 2,
2759 ASSERT_TRUE (result
);
2760 ASSERT_STREQ ("0123456789", (const char *)dst_string
.text
);
2761 free (const_cast <unsigned char *> (dst_string
.text
));
2763 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2764 test
.m_concats
.record_string_concatenation (2, input_locs
);
2766 location_t initial_loc
= input_locs
[0];
2768 for (int i
= 0; i
<= 4; i
++)
2769 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, i
, 1, 10 + i
, 10 + i
);
2770 for (int i
= 5; i
<= 9; i
++)
2771 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, i
, 2, 5 + i
, 5 + i
);
2773 ASSERT_NUM_SUBSTRING_RANGES (test
, initial_loc
, type
, 10);
2776 /* Another test of string literal concatenation. */
2779 test_lexer_string_locations_concatenation_2 (const line_table_case
&case_
)
2782 .....................000000000.111.11111112222222
2783 .....................123456789.012.34567890123456. */
2784 const char *content
= (" \"01\" /* non-str */\n"
2785 " \"23\" /* non-str */\n"
2786 " \"45\" /* non-str */\n"
2787 " \"67\" /* non-str */\n"
2788 " \"89\" /* non-str */\n");
2789 lexer_test
test (case_
, content
, NULL
);
2791 auto_vec
<cpp_string
> input_strings
;
2792 location_t input_locs
[5];
2794 /* Verify that we get the expected tokens back. */
2795 for (int i
= 0; i
< 5; i
++)
2797 const cpp_token
*tok
= test
.get_token ();
2798 ASSERT_EQ (tok
->type
, CPP_STRING
);
2799 input_strings
.safe_push (tok
->val
.str
);
2800 input_locs
[i
] = tok
->src_loc
;
2803 /* Verify that cpp_interpret_string works. */
2804 cpp_string dst_string
;
2805 const enum cpp_ttype type
= CPP_STRING
;
2806 bool result
= cpp_interpret_string (test
.m_parser
,
2807 input_strings
.address (), 5,
2809 ASSERT_TRUE (result
);
2810 ASSERT_STREQ ("0123456789", (const char *)dst_string
.text
);
2811 free (const_cast <unsigned char *> (dst_string
.text
));
2813 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2814 test
.m_concats
.record_string_concatenation (5, input_locs
);
2816 location_t initial_loc
= input_locs
[0];
2818 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
2819 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
2820 and expect get_source_range_for_substring to fail.
2821 However, for a string concatenation test, we can have a case
2822 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
2823 but subsequent strings can be after it.
2824 Attempting to detect this within assert_char_at_range
2825 would overcomplicate the logic for the common test cases, so
2826 we detect it here. */
2827 if (should_have_column_data_p (input_locs
[0])
2828 && !should_have_column_data_p (input_locs
[4]))
2830 /* Verify that get_source_range_for_substring gracefully rejects
2832 source_range actual_range
;
2834 = get_source_range_for_substring (test
.m_parser
, &test
.m_concats
,
2835 initial_loc
, type
, 0, 0,
2837 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err
);
2841 for (int i
= 0; i
< 5; i
++)
2842 for (int j
= 0; j
< 2; j
++)
2843 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, (i
* 2) + j
,
2844 i
+ 1, 10 + j
, 10 + j
);
2846 ASSERT_NUM_SUBSTRING_RANGES (test
, initial_loc
, type
, 10);
2849 /* Another test of string literal concatenation, this time combined with
2850 various kinds of escaped characters. */
2853 test_lexer_string_locations_concatenation_3 (const line_table_case
&case_
)
2855 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
2856 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
2858 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
2859 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
2860 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
2861 lexer_test
test (case_
, content
, NULL
);
2863 auto_vec
<cpp_string
> input_strings
;
2864 location_t input_locs
[4];
2866 /* Verify that we get the expected tokens back. */
2867 for (int i
= 0; i
< 4; i
++)
2869 const cpp_token
*tok
= test
.get_token ();
2870 ASSERT_EQ (tok
->type
, CPP_STRING
);
2871 input_strings
.safe_push (tok
->val
.str
);
2872 input_locs
[i
] = tok
->src_loc
;
2875 /* Verify that cpp_interpret_string works. */
2876 cpp_string dst_string
;
2877 const enum cpp_ttype type
= CPP_STRING
;
2878 bool result
= cpp_interpret_string (test
.m_parser
,
2879 input_strings
.address (), 4,
2881 ASSERT_TRUE (result
);
2882 ASSERT_STREQ ("0123456789", (const char *)dst_string
.text
);
2883 free (const_cast <unsigned char *> (dst_string
.text
));
2885 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2886 test
.m_concats
.record_string_concatenation (4, input_locs
);
2888 location_t initial_loc
= input_locs
[0];
2890 for (int i
= 0; i
<= 4; i
++)
2891 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, i
, 1, 10 + i
, 10 + i
);
2892 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, 5, 1, 19, 22);
2893 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, 6, 1, 27, 30);
2894 for (int i
= 7; i
<= 9; i
++)
2895 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, i
, 1, 28 + i
, 28 + i
);
2897 ASSERT_NUM_SUBSTRING_RANGES (test
, initial_loc
, type
, 10);
2900 /* Test of string literal in a macro. */
2903 test_lexer_string_locations_macro (const line_table_case
&case_
)
2906 .....................0000000001111111111.22222222223.
2907 .....................1234567890123456789.01234567890. */
2908 const char *content
= ("#define MACRO \"0123456789\" /* non-str */\n"
2910 lexer_test
test (case_
, content
, NULL
);
2912 /* Verify that we get the expected tokens back. */
2913 const cpp_token
*tok
= test
.get_token ();
2914 ASSERT_EQ (tok
->type
, CPP_PADDING
);
2916 tok
= test
.get_token ();
2917 ASSERT_EQ (tok
->type
, CPP_STRING
);
2918 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"0123456789\"");
2920 /* Verify ranges of individual characters. We ought to
2921 see columns within the macro definition. */
2922 for (int i
= 0; i
<= 9; i
++)
2923 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
2924 i
, 1, 20 + i
, 20 + i
);
2926 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, CPP_STRING
, 10);
2928 tok
= test
.get_token ();
2929 ASSERT_EQ (tok
->type
, CPP_PADDING
);
2932 /* Test of stringification of a macro argument. */
2935 test_lexer_string_locations_stringified_macro_argument
2936 (const line_table_case
&case_
)
2938 /* .....................000000000111111111122222222223.
2939 .....................123456789012345678901234567890. */
2940 const char *content
= ("#define MACRO(X) #X /* non-str */\n"
2942 lexer_test
test (case_
, content
, NULL
);
2944 /* Verify that we get the expected token back. */
2945 const cpp_token
*tok
= test
.get_token ();
2946 ASSERT_EQ (tok
->type
, CPP_PADDING
);
2948 tok
= test
.get_token ();
2949 ASSERT_EQ (tok
->type
, CPP_STRING
);
2950 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"foo\"");
2952 /* We don't support getting the location of a stringified macro
2953 argument. Verify that it fails gracefully. */
2954 ASSERT_HAS_NO_SUBSTRING_RANGES (test
, tok
->src_loc
, CPP_STRING
,
2955 "cpp_interpret_string_1 failed");
2957 tok
= test
.get_token ();
2958 ASSERT_EQ (tok
->type
, CPP_PADDING
);
2960 tok
= test
.get_token ();
2961 ASSERT_EQ (tok
->type
, CPP_PADDING
);
2964 /* Ensure that we are fail gracefully if something attempts to pass
2965 in a location that isn't a string literal token. Seen on this code:
2967 const char a[] = " %d ";
2968 __builtin_printf (a, 0.5);
2971 when c-format.c erroneously used the indicated one-character
2972 location as the format string location, leading to a read past the
2973 end of a string buffer in cpp_interpret_string_1. */
2976 test_lexer_string_locations_non_string (const line_table_case
&case_
)
2978 /* .....................000000000111111111122222222223.
2979 .....................123456789012345678901234567890. */
2980 const char *content
= (" a\n");
2981 lexer_test
test (case_
, content
, NULL
);
2983 /* Verify that we get the expected token back. */
2984 const cpp_token
*tok
= test
.get_token ();
2985 ASSERT_EQ (tok
->type
, CPP_NAME
);
2986 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "a");
2988 /* At this point, libcpp is attempting to interpret the name as a
2989 string literal, despite it not starting with a quote. We don't detect
2990 that, but we should at least fail gracefully. */
2991 ASSERT_HAS_NO_SUBSTRING_RANGES (test
, tok
->src_loc
, CPP_STRING
,
2992 "cpp_interpret_string_1 failed");
2995 /* Ensure that we can read substring information for a token which
2996 starts in one linemap and ends in another . Adapted from
2997 gcc.dg/cpp/pr69985.c. */
3000 test_lexer_string_locations_long_line (const line_table_case
&case_
)
3002 /* .....................000000.000111111111
3003 .....................123456.789012346789. */
3004 const char *content
= ("/* A very long line, so that we start a new line map. */\n"
3005 " \"0123456789012345678901234567890123456789"
3006 "0123456789012345678901234567890123456789"
3007 "0123456789012345678901234567890123456789"
3010 lexer_test
test (case_
, content
, NULL
);
3012 /* Verify that we get the expected token back. */
3013 const cpp_token
*tok
= test
.get_token ();
3014 ASSERT_EQ (tok
->type
, CPP_STRING
);
3016 if (!should_have_column_data_p (line_table
->highest_location
))
3019 /* Verify ranges of individual characters. */
3020 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, CPP_STRING
, 130);
3021 for (int i
= 0; i
< 130; i
++)
3022 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
3023 i
, 2, 7 + i
, 7 + i
);
3026 /* Test of lexing char constants. */
3029 test_lexer_char_constants (const line_table_case
&case_
)
3031 /* Various char constants.
3032 .....................0000000001111111111.22222222223.
3033 .....................1234567890123456789.01234567890. */
3034 const char *content
= (" 'a'\n"
3039 lexer_test
test (case_
, content
, NULL
);
3041 /* Verify that we get the expected tokens back. */
3043 const cpp_token
*tok
= test
.get_token ();
3044 ASSERT_EQ (tok
->type
, CPP_CHAR
);
3045 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "'a'");
3047 unsigned int chars_seen
;
3049 cppchar_t cc
= cpp_interpret_charconst (test
.m_parser
, tok
,
3050 &chars_seen
, &unsignedp
);
3051 ASSERT_EQ (cc
, 'a');
3052 ASSERT_EQ (chars_seen
, 1);
3055 tok
= test
.get_token ();
3056 ASSERT_EQ (tok
->type
, CPP_CHAR16
);
3057 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "u'a'");
3060 tok
= test
.get_token ();
3061 ASSERT_EQ (tok
->type
, CPP_CHAR32
);
3062 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "U'a'");
3065 tok
= test
.get_token ();
3066 ASSERT_EQ (tok
->type
, CPP_WCHAR
);
3067 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "L'a'");
3069 /* 'abc' (c-char-sequence). */
3070 tok
= test
.get_token ();
3071 ASSERT_EQ (tok
->type
, CPP_CHAR
);
3072 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "'abc'");
3074 /* A table of interesting location_t values, giving one axis of our test
3077 static const location_t boundary_locations
[] = {
3078 /* Zero means "don't override the default values for a new line_table". */
3081 /* An arbitrary non-zero value that isn't close to one of
3082 the boundary values below. */
3085 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3086 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES
- 0x100,
3087 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES
- 1,
3088 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES
,
3089 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES
+ 1,
3090 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES
+ 0x100,
3092 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3093 LINE_MAP_MAX_LOCATION_WITH_COLS
- 0x100,
3094 LINE_MAP_MAX_LOCATION_WITH_COLS
- 1,
3095 LINE_MAP_MAX_LOCATION_WITH_COLS
,
3096 LINE_MAP_MAX_LOCATION_WITH_COLS
+ 1,
3097 LINE_MAP_MAX_LOCATION_WITH_COLS
+ 0x100,
3100 /* Run all of the selftests within this file. */
3105 test_should_have_column_data_p ();
3106 test_unknown_location ();
3109 /* As noted above in the description of struct line_table_case,
3110 we want to explore a test matrix of interesting line_table
3111 situations, running various selftests for each case within the
3114 /* Run all tests with:
3115 (a) line_table->default_range_bits == 0, and
3116 (b) line_table->default_range_bits == 5. */
3117 int num_cases_tested
= 0;
3118 for (int default_range_bits
= 0; default_range_bits
<= 5;
3119 default_range_bits
+= 5)
3121 /* ...and use each of the "interesting" location values as
3122 the starting location within line_table. */
3123 const int num_boundary_locations
3124 = sizeof (boundary_locations
) / sizeof (boundary_locations
[0]);
3125 for (int loc_idx
= 0; loc_idx
< num_boundary_locations
; loc_idx
++)
3127 line_table_case
c (default_range_bits
, boundary_locations
[loc_idx
]);
3129 /* Run all tests for the given case within the test matrix. */
3130 test_accessing_ordinary_linemaps (c
);
3132 test_lexer_string_locations_simple (c
);
3133 test_lexer_string_locations_ebcdic (c
);
3134 test_lexer_string_locations_hex (c
);
3135 test_lexer_string_locations_oct (c
);
3136 test_lexer_string_locations_letter_escape_1 (c
);
3137 test_lexer_string_locations_letter_escape_2 (c
);
3138 test_lexer_string_locations_ucn4 (c
);
3139 test_lexer_string_locations_ucn8 (c
);
3140 test_lexer_string_locations_wide_string (c
);
3141 test_lexer_string_locations_string16 (c
);
3142 test_lexer_string_locations_string32 (c
);
3143 test_lexer_string_locations_u8 (c
);
3144 test_lexer_string_locations_utf8_source (c
);
3145 test_lexer_string_locations_concatenation_1 (c
);
3146 test_lexer_string_locations_concatenation_2 (c
);
3147 test_lexer_string_locations_concatenation_3 (c
);
3148 test_lexer_string_locations_macro (c
);
3149 test_lexer_string_locations_stringified_macro_argument (c
);
3150 test_lexer_string_locations_non_string (c
);
3151 test_lexer_string_locations_long_line (c
);
3152 test_lexer_char_constants (c
);
3158 /* Verify that we fully covered the test matrix. */
3159 ASSERT_EQ (num_cases_tested
, 2 * 12);
3161 test_reading_source_line ();
3164 } // namespace selftest
3166 #endif /* CHECKING_P */