* config/sparc/sparc.opt (msubxc): New option.
[official-gcc.git] / gcc / input.c
blob61316599cbd25329e6536c2068d4821400b80080
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic-core.h"
25 #include "selftest.h"
26 #include "cpplib.h"
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
32 /* This is a cache used by get_next_line to store the content of a
33 file to be searched for file lines. */
34 struct fcache
36 /* These are information used to store a line boundary. */
37 struct line_info
39 /* The line number. It starts from 1. */
40 size_t line_num;
42 /* The position (byte count) of the beginning of the line,
43 relative to the file data pointer. This starts at zero. */
44 size_t start_pos;
46 /* The position (byte count) of the last byte of the line. This
47 normally points to the '\n' character, or to one byte after the
48 last byte of the file, if the file doesn't contain a '\n'
49 character. */
50 size_t end_pos;
52 line_info (size_t l, size_t s, size_t e)
53 : line_num (l), start_pos (s), end_pos (e)
56 line_info ()
57 :line_num (0), start_pos (0), end_pos (0)
61 /* The number of time this file has been accessed. This is used
62 to designate which file cache to evict from the cache
63 array. */
64 unsigned use_count;
66 const char *file_path;
68 FILE *fp;
70 /* This points to the content of the file that we've read so
71 far. */
72 char *data;
74 /* The size of the DATA array above.*/
75 size_t size;
77 /* The number of bytes read from the underlying file so far. This
78 must be less (or equal) than SIZE above. */
79 size_t nb_read;
81 /* The index of the beginning of the current line. */
82 size_t line_start_idx;
84 /* The number of the previous line read. This starts at 1. Zero
85 means we've read no line so far. */
86 size_t line_num;
88 /* This is the total number of lines of the current file. At the
89 moment, we try to get this information from the line map
90 subsystem. Note that this is just a hint. When using the C++
91 front-end, this hint is correct because the input file is then
92 completely tokenized before parsing starts; so the line map knows
93 the number of lines before compilation really starts. For e.g,
94 the C front-end, it can happen that we start emitting diagnostics
95 before the line map has seen the end of the file. */
96 size_t total_lines;
98 /* Could this file be missing a trailing newline on its final line?
99 Initially true (to cope with empty files), set to true/false
100 as each line is read. */
101 bool missing_trailing_newline;
103 /* This is a record of the beginning and end of the lines we've seen
104 while reading the file. This is useful to avoid walking the data
105 from the beginning when we are asked to read a line that is
106 before LINE_START_IDX above. Note that the maximum size of this
107 record is fcache_line_record_size, so that the memory consumption
108 doesn't explode. We thus scale total_lines down to
109 fcache_line_record_size. */
110 vec<line_info, va_heap> line_record;
112 fcache ();
113 ~fcache ();
116 /* Current position in real source file. */
118 location_t input_location = UNKNOWN_LOCATION;
120 struct line_maps *line_table;
122 /* A stashed copy of "line_table" for use by selftest::line_table_test.
123 This needs to be a global so that it can be a GC root, and thus
124 prevent the stashed copy from being garbage-collected if the GC runs
125 during a line_table_test. */
127 struct line_maps *saved_line_table;
129 static fcache *fcache_tab;
130 static const size_t fcache_tab_size = 16;
131 static const size_t fcache_buffer_size = 4 * 1024;
132 static const size_t fcache_line_record_size = 100;
134 /* Expand the source location LOC into a human readable location. If
135 LOC resolves to a builtin location, the file name of the readable
136 location is set to the string "<built-in>". If EXPANSION_POINT_P is
137 TRUE and LOC is virtual, then it is resolved to the expansion
138 point of the involved macro. Otherwise, it is resolved to the
139 spelling location of the token.
141 When resolving to the spelling location of the token, if the
142 resulting location is for a built-in location (that is, it has no
143 associated line/column) in the context of a macro expansion, the
144 returned location is the first one (while unwinding the macro
145 location towards its expansion point) that is in real source
146 code. */
148 static expanded_location
149 expand_location_1 (source_location loc,
150 bool expansion_point_p)
152 expanded_location xloc;
153 const line_map_ordinary *map;
154 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
155 tree block = NULL;
157 if (IS_ADHOC_LOC (loc))
159 block = LOCATION_BLOCK (loc);
160 loc = LOCATION_LOCUS (loc);
163 memset (&xloc, 0, sizeof (xloc));
165 if (loc >= RESERVED_LOCATION_COUNT)
167 if (!expansion_point_p)
169 /* We want to resolve LOC to its spelling location.
171 But if that spelling location is a reserved location that
172 appears in the context of a macro expansion (like for a
173 location for a built-in token), let's consider the first
174 location (toward the expansion point) that is not reserved;
175 that is, the first location that is in real source code. */
176 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
177 loc, NULL);
178 lrk = LRK_SPELLING_LOCATION;
180 loc = linemap_resolve_location (line_table, loc,
181 lrk, &map);
182 xloc = linemap_expand_location (line_table, map, loc);
185 xloc.data = block;
186 if (loc <= BUILTINS_LOCATION)
187 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
189 return xloc;
192 /* Initialize the set of cache used for files accessed by caret
193 diagnostic. */
195 static void
196 diagnostic_file_cache_init (void)
198 if (fcache_tab == NULL)
199 fcache_tab = new fcache[fcache_tab_size];
202 /* Free the resources used by the set of cache used for files accessed
203 by caret diagnostic. */
205 void
206 diagnostic_file_cache_fini (void)
208 if (fcache_tab)
210 delete [] (fcache_tab);
211 fcache_tab = NULL;
215 /* Return the total lines number that have been read so far by the
216 line map (in the preprocessor) so far. For languages like C++ that
217 entirely preprocess the input file before starting to parse, this
218 equals the actual number of lines of the file. */
220 static size_t
221 total_lines_num (const char *file_path)
223 size_t r = 0;
224 source_location l = 0;
225 if (linemap_get_file_highest_location (line_table, file_path, &l))
227 gcc_assert (l >= RESERVED_LOCATION_COUNT);
228 expanded_location xloc = expand_location (l);
229 r = xloc.line;
231 return r;
234 /* Lookup the cache used for the content of a given file accessed by
235 caret diagnostic. Return the found cached file, or NULL if no
236 cached file was found. */
238 static fcache*
239 lookup_file_in_cache_tab (const char *file_path)
241 if (file_path == NULL)
242 return NULL;
244 diagnostic_file_cache_init ();
246 /* This will contain the found cached file. */
247 fcache *r = NULL;
248 for (unsigned i = 0; i < fcache_tab_size; ++i)
250 fcache *c = &fcache_tab[i];
251 if (c->file_path && !strcmp (c->file_path, file_path))
253 ++c->use_count;
254 r = c;
258 if (r)
259 ++r->use_count;
261 return r;
264 /* Purge any mention of FILENAME from the cache of files used for
265 printing source code. For use in selftests when working
266 with tempfiles. */
268 void
269 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
271 gcc_assert (file_path);
273 fcache *r = lookup_file_in_cache_tab (file_path);
274 if (!r)
275 /* Not found. */
276 return;
278 r->file_path = NULL;
279 if (r->fp)
280 fclose (r->fp);
281 r->fp = NULL;
282 r->nb_read = 0;
283 r->line_start_idx = 0;
284 r->line_num = 0;
285 r->line_record.truncate (0);
286 r->use_count = 0;
287 r->total_lines = 0;
288 r->missing_trailing_newline = true;
291 /* Return the file cache that has been less used, recently, or the
292 first empty one. If HIGHEST_USE_COUNT is non-null,
293 *HIGHEST_USE_COUNT is set to the highest use count of the entries
294 in the cache table. */
296 static fcache*
297 evicted_cache_tab_entry (unsigned *highest_use_count)
299 diagnostic_file_cache_init ();
301 fcache *to_evict = &fcache_tab[0];
302 unsigned huc = to_evict->use_count;
303 for (unsigned i = 1; i < fcache_tab_size; ++i)
305 fcache *c = &fcache_tab[i];
306 bool c_is_empty = (c->file_path == NULL);
308 if (c->use_count < to_evict->use_count
309 || (to_evict->file_path && c_is_empty))
310 /* We evict C because it's either an entry with a lower use
311 count or one that is empty. */
312 to_evict = c;
314 if (huc < c->use_count)
315 huc = c->use_count;
317 if (c_is_empty)
318 /* We've reached the end of the cache; subsequent elements are
319 all empty. */
320 break;
323 if (highest_use_count)
324 *highest_use_count = huc;
326 return to_evict;
329 /* Create the cache used for the content of a given file to be
330 accessed by caret diagnostic. This cache is added to an array of
331 cache and can be retrieved by lookup_file_in_cache_tab. This
332 function returns the created cache. Note that only the last
333 fcache_tab_size files are cached. */
335 static fcache*
336 add_file_to_cache_tab (const char *file_path)
339 FILE *fp = fopen (file_path, "r");
340 if (fp == NULL)
341 return NULL;
343 unsigned highest_use_count = 0;
344 fcache *r = evicted_cache_tab_entry (&highest_use_count);
345 r->file_path = file_path;
346 if (r->fp)
347 fclose (r->fp);
348 r->fp = fp;
349 r->nb_read = 0;
350 r->line_start_idx = 0;
351 r->line_num = 0;
352 r->line_record.truncate (0);
353 /* Ensure that this cache entry doesn't get evicted next time
354 add_file_to_cache_tab is called. */
355 r->use_count = ++highest_use_count;
356 r->total_lines = total_lines_num (file_path);
357 r->missing_trailing_newline = true;
359 return r;
362 /* Lookup the cache used for the content of a given file accessed by
363 caret diagnostic. If no cached file was found, create a new cache
364 for this file, add it to the array of cached file and return
365 it. */
367 static fcache*
368 lookup_or_add_file_to_cache_tab (const char *file_path)
370 fcache *r = lookup_file_in_cache_tab (file_path);
371 if (r == NULL)
372 r = add_file_to_cache_tab (file_path);
373 return r;
376 /* Default constructor for a cache of file used by caret
377 diagnostic. */
379 fcache::fcache ()
380 : use_count (0), file_path (NULL), fp (NULL), data (0),
381 size (0), nb_read (0), line_start_idx (0), line_num (0),
382 total_lines (0), missing_trailing_newline (true)
384 line_record.create (0);
387 /* Destructor for a cache of file used by caret diagnostic. */
389 fcache::~fcache ()
391 if (fp)
393 fclose (fp);
394 fp = NULL;
396 if (data)
398 XDELETEVEC (data);
399 data = 0;
401 line_record.release ();
404 /* Returns TRUE iff the cache would need to be filled with data coming
405 from the file. That is, either the cache is empty or full or the
406 current line is empty. Note that if the cache is full, it would
407 need to be extended and filled again. */
409 static bool
410 needs_read (fcache *c)
412 return (c->nb_read == 0
413 || c->nb_read == c->size
414 || (c->line_start_idx >= c->nb_read - 1));
417 /* Return TRUE iff the cache is full and thus needs to be
418 extended. */
420 static bool
421 needs_grow (fcache *c)
423 return c->nb_read == c->size;
426 /* Grow the cache if it needs to be extended. */
428 static void
429 maybe_grow (fcache *c)
431 if (!needs_grow (c))
432 return;
434 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
435 c->data = XRESIZEVEC (char, c->data, size);
436 c->size = size;
439 /* Read more data into the cache. Extends the cache if need be.
440 Returns TRUE iff new data could be read. */
442 static bool
443 read_data (fcache *c)
445 if (feof (c->fp) || ferror (c->fp))
446 return false;
448 maybe_grow (c);
450 char * from = c->data + c->nb_read;
451 size_t to_read = c->size - c->nb_read;
452 size_t nb_read = fread (from, 1, to_read, c->fp);
454 if (ferror (c->fp))
455 return false;
457 c->nb_read += nb_read;
458 return !!nb_read;
461 /* Read new data iff the cache needs to be filled with more data
462 coming from the file FP. Return TRUE iff the cache was filled with
463 mode data. */
465 static bool
466 maybe_read_data (fcache *c)
468 if (!needs_read (c))
469 return false;
470 return read_data (c);
473 /* Read a new line from file FP, using C as a cache for the data
474 coming from the file. Upon successful completion, *LINE is set to
475 the beginning of the line found. *LINE points directly in the
476 line cache and is only valid until the next call of get_next_line.
477 *LINE_LEN is set to the length of the line. Note that the line
478 does not contain any terminal delimiter. This function returns
479 true if some data was read or process from the cache, false
480 otherwise. Note that subsequent calls to get_next_line might
481 make the content of *LINE invalid. */
483 static bool
484 get_next_line (fcache *c, char **line, ssize_t *line_len)
486 /* Fill the cache with data to process. */
487 maybe_read_data (c);
489 size_t remaining_size = c->nb_read - c->line_start_idx;
490 if (remaining_size == 0)
491 /* There is no more data to process. */
492 return false;
494 char *line_start = c->data + c->line_start_idx;
496 char *next_line_start = NULL;
497 size_t len = 0;
498 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
499 if (line_end == NULL)
501 /* We haven't found the end-of-line delimiter in the cache.
502 Fill the cache with more data from the file and look for the
503 '\n'. */
504 while (maybe_read_data (c))
506 line_start = c->data + c->line_start_idx;
507 remaining_size = c->nb_read - c->line_start_idx;
508 line_end = (char *) memchr (line_start, '\n', remaining_size);
509 if (line_end != NULL)
511 next_line_start = line_end + 1;
512 break;
515 if (line_end == NULL)
517 /* We've loadded all the file into the cache and still no
518 '\n'. Let's say the line ends up at one byte passed the
519 end of the file. This is to stay consistent with the case
520 of when the line ends up with a '\n' and line_end points to
521 that terminal '\n'. That consistency is useful below in
522 the len calculation. */
523 line_end = c->data + c->nb_read ;
524 c->missing_trailing_newline = true;
526 else
527 c->missing_trailing_newline = false;
529 else
531 next_line_start = line_end + 1;
532 c->missing_trailing_newline = false;
535 if (ferror (c->fp))
536 return false;
538 /* At this point, we've found the end of the of line. It either
539 points to the '\n' or to one byte after the last byte of the
540 file. */
541 gcc_assert (line_end != NULL);
543 len = line_end - line_start;
545 if (c->line_start_idx < c->nb_read)
546 *line = line_start;
548 ++c->line_num;
550 /* Before we update our line record, make sure the hint about the
551 total number of lines of the file is correct. If it's not, then
552 we give up recording line boundaries from now on. */
553 bool update_line_record = true;
554 if (c->line_num > c->total_lines)
555 update_line_record = false;
557 /* Now update our line record so that re-reading lines from the
558 before c->line_start_idx is faster. */
559 if (update_line_record
560 && c->line_record.length () < fcache_line_record_size)
562 /* If the file lines fits in the line record, we just record all
563 its lines ...*/
564 if (c->total_lines <= fcache_line_record_size
565 && c->line_num > c->line_record.length ())
566 c->line_record.safe_push (fcache::line_info (c->line_num,
567 c->line_start_idx,
568 line_end - c->data));
569 else if (c->total_lines > fcache_line_record_size)
571 /* ... otherwise, we just scale total_lines down to
572 (fcache_line_record_size lines. */
573 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
574 if (c->line_record.length () == 0
575 || n >= c->line_record.length ())
576 c->line_record.safe_push (fcache::line_info (c->line_num,
577 c->line_start_idx,
578 line_end - c->data));
582 /* Update c->line_start_idx so that it points to the next line to be
583 read. */
584 if (next_line_start)
585 c->line_start_idx = next_line_start - c->data;
586 else
587 /* We didn't find any terminal '\n'. Let's consider that the end
588 of line is the end of the data in the cache. The next
589 invocation of get_next_line will either read more data from the
590 underlying file or return false early because we've reached the
591 end of the file. */
592 c->line_start_idx = c->nb_read;
594 *line_len = len;
596 return true;
599 /* Consume the next bytes coming from the cache (or from its
600 underlying file if there are remaining unread bytes in the file)
601 until we reach the next end-of-line (or end-of-file). There is no
602 copying from the cache involved. Return TRUE upon successful
603 completion. */
605 static bool
606 goto_next_line (fcache *cache)
608 char *l;
609 ssize_t len;
611 return get_next_line (cache, &l, &len);
614 /* Read an arbitrary line number LINE_NUM from the file cached in C.
615 If the line was read successfully, *LINE points to the beginning
616 of the line in the file cache and *LINE_LEN is the length of the
617 line. *LINE is not nul-terminated, but may contain zero bytes.
618 *LINE is only valid until the next call of read_line_num.
619 This function returns bool if a line was read. */
621 static bool
622 read_line_num (fcache *c, size_t line_num,
623 char **line, ssize_t *line_len)
625 gcc_assert (line_num > 0);
627 if (line_num <= c->line_num)
629 /* We've been asked to read lines that are before c->line_num.
630 So lets use our line record (if it's not empty) to try to
631 avoid re-reading the file from the beginning again. */
633 if (c->line_record.is_empty ())
635 c->line_start_idx = 0;
636 c->line_num = 0;
638 else
640 fcache::line_info *i = NULL;
641 if (c->total_lines <= fcache_line_record_size)
643 /* In languages where the input file is not totally
644 preprocessed up front, the c->total_lines hint
645 can be smaller than the number of lines of the
646 file. In that case, only the first
647 c->total_lines have been recorded.
649 Otherwise, the first c->total_lines we've read have
650 their start/end recorded here. */
651 i = (line_num <= c->total_lines)
652 ? &c->line_record[line_num - 1]
653 : &c->line_record[c->total_lines - 1];
654 gcc_assert (i->line_num <= line_num);
656 else
658 /* So the file had more lines than our line record
659 size. Thus the number of lines we've recorded has
660 been scaled down to fcache_line_reacord_size. Let's
661 pick the start/end of the recorded line that is
662 closest to line_num. */
663 size_t n = (line_num <= c->total_lines)
664 ? line_num * fcache_line_record_size / c->total_lines
665 : c ->line_record.length () - 1;
666 if (n < c->line_record.length ())
668 i = &c->line_record[n];
669 gcc_assert (i->line_num <= line_num);
673 if (i && i->line_num == line_num)
675 /* We have the start/end of the line. */
676 *line = c->data + i->start_pos;
677 *line_len = i->end_pos - i->start_pos;
678 return true;
681 if (i)
683 c->line_start_idx = i->start_pos;
684 c->line_num = i->line_num - 1;
686 else
688 c->line_start_idx = 0;
689 c->line_num = 0;
694 /* Let's walk from line c->line_num up to line_num - 1, without
695 copying any line. */
696 while (c->line_num < line_num - 1)
697 if (!goto_next_line (c))
698 return false;
700 /* The line we want is the next one. Let's read and copy it back to
701 the caller. */
702 return get_next_line (c, line, line_len);
705 /* Return the physical source line that corresponds to FILE_PATH/LINE.
706 The line is not nul-terminated. The returned pointer is only
707 valid until the next call of location_get_source_line.
708 Note that the line can contain several null characters,
709 so LINE_LEN, if non-null, points to the actual length of the line.
710 If the function fails, NULL is returned. */
712 const char *
713 location_get_source_line (const char *file_path, int line,
714 int *line_len)
716 char *buffer = NULL;
717 ssize_t len;
719 if (line == 0)
720 return NULL;
722 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
723 if (c == NULL)
724 return NULL;
726 bool read = read_line_num (c, line, &buffer, &len);
728 if (read && line_len)
729 *line_len = len;
731 return read ? buffer : NULL;
734 /* Determine if FILE_PATH missing a trailing newline on its final line.
735 Only valid to call once all of the file has been loaded, by
736 requesting a line number beyond the end of the file. */
738 bool
739 location_missing_trailing_newline (const char *file_path)
741 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
742 if (c == NULL)
743 return false;
745 return c->missing_trailing_newline;
748 /* Test if the location originates from the spelling location of a
749 builtin-tokens. That is, return TRUE if LOC is a (possibly
750 virtual) location of a built-in token that appears in the expansion
751 list of a macro. Please note that this function also works on
752 tokens that result from built-in tokens. For instance, the
753 function would return true if passed a token "4" that is the result
754 of the expansion of the built-in __LINE__ macro. */
755 bool
756 is_location_from_builtin_token (source_location loc)
758 const line_map_ordinary *map = NULL;
759 loc = linemap_resolve_location (line_table, loc,
760 LRK_SPELLING_LOCATION, &map);
761 return loc == BUILTINS_LOCATION;
764 /* Expand the source location LOC into a human readable location. If
765 LOC is virtual, it resolves to the expansion point of the involved
766 macro. If LOC resolves to a builtin location, the file name of the
767 readable location is set to the string "<built-in>". */
769 expanded_location
770 expand_location (source_location loc)
772 return expand_location_1 (loc, /*expansion_point_p=*/true);
775 /* Expand the source location LOC into a human readable location. If
776 LOC is virtual, it resolves to the expansion location of the
777 relevant macro. If LOC resolves to a builtin location, the file
778 name of the readable location is set to the string
779 "<built-in>". */
781 expanded_location
782 expand_location_to_spelling_point (source_location loc)
784 return expand_location_1 (loc, /*expansion_point_p=*/false);
787 /* The rich_location class within libcpp requires a way to expand
788 source_location instances, and relies on the client code
789 providing a symbol named
790 linemap_client_expand_location_to_spelling_point
791 to do this.
793 This is the implementation for libcommon.a (all host binaries),
794 which simply calls into expand_location_to_spelling_point. */
796 expanded_location
797 linemap_client_expand_location_to_spelling_point (source_location loc)
799 return expand_location_to_spelling_point (loc);
803 /* If LOCATION is in a system header and if it is a virtual location for
804 a token coming from the expansion of a macro, unwind it to the
805 location of the expansion point of the macro. Otherwise, just return
806 LOCATION.
808 This is used for instance when we want to emit diagnostics about a
809 token that may be located in a macro that is itself defined in a
810 system header, for example, for the NULL macro. In such a case, if
811 LOCATION were passed directly to diagnostic functions such as
812 warning_at, the diagnostic would be suppressed (unless
813 -Wsystem-headers). */
815 source_location
816 expansion_point_location_if_in_system_header (source_location location)
818 if (in_system_header_at (location))
819 location = linemap_resolve_location (line_table, location,
820 LRK_MACRO_EXPANSION_POINT,
821 NULL);
822 return location;
825 /* If LOCATION is a virtual location for a token coming from the expansion
826 of a macro, unwind to the location of the expansion point of the macro. */
828 source_location
829 expansion_point_location (source_location location)
831 return linemap_resolve_location (line_table, location,
832 LRK_MACRO_EXPANSION_POINT, NULL);
835 /* Construct a location with caret at CARET, ranging from START to
836 finish e.g.
838 11111111112
839 12345678901234567890
841 523 return foo + bar;
842 ~~~~^~~~~
845 The location's caret is at the "+", line 523 column 15, but starts
846 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
847 of "bar" at column 19. */
849 location_t
850 make_location (location_t caret, location_t start, location_t finish)
852 location_t pure_loc = get_pure_location (caret);
853 source_range src_range;
854 src_range.m_start = get_start (start);
855 src_range.m_finish = get_finish (finish);
856 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
857 pure_loc,
858 src_range,
859 NULL);
860 return combined_loc;
863 #define ONE_K 1024
864 #define ONE_M (ONE_K * ONE_K)
866 /* Display a number as an integer multiple of either:
867 - 1024, if said integer is >= to 10 K (in base 2)
868 - 1024 * 1024, if said integer is >= 10 M in (base 2)
870 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
871 ? (x) \
872 : ((x) < 10 * ONE_M \
873 ? (x) / ONE_K \
874 : (x) / ONE_M)))
876 /* For a given integer, display either:
877 - the character 'k', if the number is higher than 10 K (in base 2)
878 but strictly lower than 10 M (in base 2)
879 - the character 'M' if the number is higher than 10 M (in base2)
880 - the charcter ' ' if the number is strictly lower than 10 K */
881 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
883 /* Display an integer amount as multiple of 1K or 1M (in base 2).
884 Display the correct unit (either k, M, or ' ') after the amout, as
885 well. */
886 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
888 /* Dump statistics to stderr about the memory usage of the line_table
889 set of line maps. This also displays some statistics about macro
890 expansion. */
892 void
893 dump_line_table_statistics (void)
895 struct linemap_stats s;
896 long total_used_map_size,
897 macro_maps_size,
898 total_allocated_map_size;
900 memset (&s, 0, sizeof (s));
902 linemap_get_statistics (line_table, &s);
904 macro_maps_size = s.macro_maps_used_size
905 + s.macro_maps_locations_size;
907 total_allocated_map_size = s.ordinary_maps_allocated_size
908 + s.macro_maps_allocated_size
909 + s.macro_maps_locations_size;
911 total_used_map_size = s.ordinary_maps_used_size
912 + s.macro_maps_used_size
913 + s.macro_maps_locations_size;
915 fprintf (stderr, "Number of expanded macros: %5ld\n",
916 s.num_expanded_macros);
917 if (s.num_expanded_macros != 0)
918 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
919 s.num_macro_tokens / s.num_expanded_macros);
920 fprintf (stderr,
921 "\nLine Table allocations during the "
922 "compilation process\n");
923 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
924 SCALE (s.num_ordinary_maps_used),
925 STAT_LABEL (s.num_ordinary_maps_used));
926 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
927 SCALE (s.ordinary_maps_used_size),
928 STAT_LABEL (s.ordinary_maps_used_size));
929 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
930 SCALE (s.num_ordinary_maps_allocated),
931 STAT_LABEL (s.num_ordinary_maps_allocated));
932 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
933 SCALE (s.ordinary_maps_allocated_size),
934 STAT_LABEL (s.ordinary_maps_allocated_size));
935 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
936 SCALE (s.num_macro_maps_used),
937 STAT_LABEL (s.num_macro_maps_used));
938 fprintf (stderr, "Macro maps used size: %5ld%c\n",
939 SCALE (s.macro_maps_used_size),
940 STAT_LABEL (s.macro_maps_used_size));
941 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
942 SCALE (s.macro_maps_locations_size),
943 STAT_LABEL (s.macro_maps_locations_size));
944 fprintf (stderr, "Macro maps size: %5ld%c\n",
945 SCALE (macro_maps_size),
946 STAT_LABEL (macro_maps_size));
947 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
948 SCALE (s.duplicated_macro_maps_locations_size),
949 STAT_LABEL (s.duplicated_macro_maps_locations_size));
950 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
951 SCALE (total_allocated_map_size),
952 STAT_LABEL (total_allocated_map_size));
953 fprintf (stderr, "Total used maps size: %5ld%c\n",
954 SCALE (total_used_map_size),
955 STAT_LABEL (total_used_map_size));
956 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
957 SCALE (s.adhoc_table_size),
958 STAT_LABEL (s.adhoc_table_size));
959 fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
960 s.adhoc_table_entries_used);
961 fprintf (stderr, "optimized_ranges: %i\n",
962 line_table->num_optimized_ranges);
963 fprintf (stderr, "unoptimized_ranges: %i\n",
964 line_table->num_unoptimized_ranges);
966 fprintf (stderr, "\n");
969 /* Get location one beyond the final location in ordinary map IDX. */
971 static source_location
972 get_end_location (struct line_maps *set, unsigned int idx)
974 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
975 return set->highest_location;
977 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
978 return MAP_START_LOCATION (next_map);
981 /* Helper function for write_digit_row. */
983 static void
984 write_digit (FILE *stream, int digit)
986 fputc ('0' + (digit % 10), stream);
989 /* Helper function for dump_location_info.
990 Write a row of numbers to STREAM, numbering a source line,
991 giving the units, tens, hundreds etc of the column number. */
993 static void
994 write_digit_row (FILE *stream, int indent,
995 const line_map_ordinary *map,
996 source_location loc, int max_col, int divisor)
998 fprintf (stream, "%*c", indent, ' ');
999 fprintf (stream, "|");
1000 for (int column = 1; column < max_col; column++)
1002 source_location column_loc = loc + (column << map->m_range_bits);
1003 write_digit (stream, column_loc / divisor);
1005 fprintf (stream, "\n");
1008 /* Write a half-closed (START) / half-open (END) interval of
1009 source_location to STREAM. */
1011 static void
1012 dump_location_range (FILE *stream,
1013 source_location start, source_location end)
1015 fprintf (stream,
1016 " source_location interval: %u <= loc < %u\n",
1017 start, end);
1020 /* Write a labelled description of a half-closed (START) / half-open (END)
1021 interval of source_location to STREAM. */
1023 static void
1024 dump_labelled_location_range (FILE *stream,
1025 const char *name,
1026 source_location start, source_location end)
1028 fprintf (stream, "%s\n", name);
1029 dump_location_range (stream, start, end);
1030 fprintf (stream, "\n");
1033 /* Write a visualization of the locations in the line_table to STREAM. */
1035 void
1036 dump_location_info (FILE *stream)
1038 /* Visualize the reserved locations. */
1039 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1040 0, RESERVED_LOCATION_COUNT);
1042 /* Visualize the ordinary line_map instances, rendering the sources. */
1043 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1045 source_location end_location = get_end_location (line_table, idx);
1046 /* half-closed: doesn't include this one. */
1048 const line_map_ordinary *map
1049 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1050 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1051 dump_location_range (stream,
1052 MAP_START_LOCATION (map), end_location);
1053 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1054 fprintf (stream, " starting at line: %i\n",
1055 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1056 fprintf (stream, " column and range bits: %i\n",
1057 map->m_column_and_range_bits);
1058 fprintf (stream, " column bits: %i\n",
1059 map->m_column_and_range_bits - map->m_range_bits);
1060 fprintf (stream, " range bits: %i\n",
1061 map->m_range_bits);
1063 /* Render the span of source lines that this "map" covers. */
1064 for (source_location loc = MAP_START_LOCATION (map);
1065 loc < end_location;
1066 loc += (1 << map->m_range_bits) )
1068 gcc_assert (pure_location_p (line_table, loc) );
1070 expanded_location exploc
1071 = linemap_expand_location (line_table, map, loc);
1073 if (0 == exploc.column)
1075 /* Beginning of a new source line: draw the line. */
1077 int line_size;
1078 const char *line_text = location_get_source_line (exploc.file,
1079 exploc.line,
1080 &line_size);
1081 if (!line_text)
1082 break;
1083 fprintf (stream,
1084 "%s:%3i|loc:%5i|%.*s\n",
1085 exploc.file, exploc.line,
1086 loc,
1087 line_size, line_text);
1089 /* "loc" is at column 0, which means "the whole line".
1090 Render the locations *within* the line, by underlining
1091 it, showing the source_location numeric values
1092 at each column. */
1093 int max_col = (1 << map->m_column_and_range_bits) - 1;
1094 if (max_col > line_size)
1095 max_col = line_size + 1;
1097 int indent = 14 + strlen (exploc.file);
1099 /* Thousands. */
1100 if (end_location > 999)
1101 write_digit_row (stream, indent, map, loc, max_col, 1000);
1103 /* Hundreds. */
1104 if (end_location > 99)
1105 write_digit_row (stream, indent, map, loc, max_col, 100);
1107 /* Tens. */
1108 write_digit_row (stream, indent, map, loc, max_col, 10);
1110 /* Units. */
1111 write_digit_row (stream, indent, map, loc, max_col, 1);
1114 fprintf (stream, "\n");
1117 /* Visualize unallocated values. */
1118 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1119 line_table->highest_location,
1120 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1122 /* Visualize the macro line_map instances, rendering the sources. */
1123 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1125 /* Each macro map that is allocated owns source_location values
1126 that are *lower* that the one before them.
1127 Hence it's meaningful to view them either in order of ascending
1128 source locations, or in order of ascending macro map index. */
1129 const bool ascending_source_locations = true;
1130 unsigned int idx = (ascending_source_locations
1131 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1132 : i);
1133 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1134 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1135 idx,
1136 linemap_map_get_macro_name (map),
1137 MACRO_MAP_NUM_MACRO_TOKENS (map));
1138 dump_location_range (stream,
1139 map->start_location,
1140 (map->start_location
1141 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1142 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1143 "expansion point is location %i",
1144 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1145 fprintf (stream, " map->start_location: %u\n",
1146 map->start_location);
1148 fprintf (stream, " macro_locations:\n");
1149 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1151 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1152 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1154 /* linemap_add_macro_token encodes token numbers in an expansion
1155 by putting them after MAP_START_LOCATION. */
1157 /* I'm typically seeing 4 uninitialized entries at the end of
1158 0xafafafaf.
1159 This appears to be due to macro.c:replace_args
1160 adding 2 extra args for padding tokens; presumably there may
1161 be a leading and/or trailing padding token injected,
1162 each for 2 more location slots.
1163 This would explain there being up to 4 source_locations slots
1164 that may be uninitialized. */
1166 fprintf (stream, " %u: %u, %u\n",
1170 if (x == y)
1172 if (x < MAP_START_LOCATION (map))
1173 inform (x, "token %u has x-location == y-location == %u", i, x);
1174 else
1175 fprintf (stream,
1176 "x-location == y-location == %u encodes token # %u\n",
1177 x, x - MAP_START_LOCATION (map));
1179 else
1181 inform (x, "token %u has x-location == %u", i, x);
1182 inform (x, "token %u has y-location == %u", i, y);
1185 fprintf (stream, "\n");
1188 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1189 macro map, presumably due to an off-by-one error somewhere
1190 between the logic in linemap_enter_macro and
1191 LINEMAPS_MACRO_LOWEST_LOCATION. */
1192 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1193 MAX_SOURCE_LOCATION,
1194 MAX_SOURCE_LOCATION + 1);
1196 /* Visualize ad-hoc values. */
1197 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1198 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1201 /* string_concat's constructor. */
1203 string_concat::string_concat (int num, location_t *locs)
1204 : m_num (num)
1206 m_locs = ggc_vec_alloc <location_t> (num);
1207 for (int i = 0; i < num; i++)
1208 m_locs[i] = locs[i];
1211 /* string_concat_db's constructor. */
1213 string_concat_db::string_concat_db ()
1215 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1218 /* Record that a string concatenation occurred, covering NUM
1219 string literal tokens. LOCS is an array of size NUM, containing the
1220 locations of the tokens. A copy of LOCS is taken. */
1222 void
1223 string_concat_db::record_string_concatenation (int num, location_t *locs)
1225 gcc_assert (num > 1);
1226 gcc_assert (locs);
1228 location_t key_loc = get_key_loc (locs[0]);
1230 string_concat *concat
1231 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1232 m_table->put (key_loc, concat);
1235 /* Determine if LOC was the location of the the initial token of a
1236 concatenation of string literal tokens.
1237 If so, *OUT_NUM is written to with the number of tokens, and
1238 *OUT_LOCS with the location of an array of locations of the
1239 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1240 storage owned by the string_concat_db.
1241 Otherwise, return false. */
1243 bool
1244 string_concat_db::get_string_concatenation (location_t loc,
1245 int *out_num,
1246 location_t **out_locs)
1248 gcc_assert (out_num);
1249 gcc_assert (out_locs);
1251 location_t key_loc = get_key_loc (loc);
1253 string_concat **concat = m_table->get (key_loc);
1254 if (!concat)
1255 return false;
1257 *out_num = (*concat)->m_num;
1258 *out_locs =(*concat)->m_locs;
1259 return true;
1262 /* Internal function. Canonicalize LOC into a form suitable for
1263 use as a key within the database, stripping away macro expansion,
1264 ad-hoc information, and range information, using the location of
1265 the start of LOC within an ordinary linemap. */
1267 location_t
1268 string_concat_db::get_key_loc (location_t loc)
1270 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1271 NULL);
1273 loc = get_range_from_loc (line_table, loc).m_start;
1275 return loc;
1278 /* Helper class for use within get_substring_ranges_for_loc.
1279 An vec of cpp_string with responsibility for releasing all of the
1280 str->text for each str in the vector. */
1282 class auto_cpp_string_vec : public auto_vec <cpp_string>
1284 public:
1285 auto_cpp_string_vec (int alloc)
1286 : auto_vec <cpp_string> (alloc) {}
1288 ~auto_cpp_string_vec ()
1290 /* Clean up the copies within this vec. */
1291 int i;
1292 cpp_string *str;
1293 FOR_EACH_VEC_ELT (*this, i, str)
1294 free (const_cast <unsigned char *> (str->text));
1298 /* Attempt to populate RANGES with source location information on the
1299 individual characters within the string literal found at STRLOC.
1300 If CONCATS is non-NULL, then any string literals that the token at
1301 STRLOC was concatenated with are also added to RANGES.
1303 Return NULL if successful, or an error message if any errors occurred (in
1304 which case RANGES may be only partially populated and should not
1305 be used).
1307 This is implemented by re-parsing the relevant source line(s). */
1309 static const char *
1310 get_substring_ranges_for_loc (cpp_reader *pfile,
1311 string_concat_db *concats,
1312 location_t strloc,
1313 enum cpp_ttype type,
1314 cpp_substring_ranges &ranges)
1316 gcc_assert (pfile);
1318 if (strloc == UNKNOWN_LOCATION)
1319 return "unknown location";
1321 /* If string concatenation has occurred at STRLOC, get the locations
1322 of all of the literal tokens making up the compound string.
1323 Otherwise, just use STRLOC. */
1324 int num_locs = 1;
1325 location_t *strlocs = &strloc;
1326 if (concats)
1327 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1329 auto_cpp_string_vec strs (num_locs);
1330 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1331 for (int i = 0; i < num_locs; i++)
1333 /* Get range of strloc. We will use it to locate the start and finish
1334 of the literal token within the line. */
1335 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1337 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1338 /* If the string is within a macro expansion, we can't get at the
1339 end location. */
1340 return "macro expansion";
1342 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1343 /* If so, we can't reliably determine where the token started within
1344 its line. */
1345 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1347 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1348 /* If so, we can't reliably determine where the token finished within
1349 its line. */
1350 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1352 expanded_location start
1353 = expand_location_to_spelling_point (src_range.m_start);
1354 expanded_location finish
1355 = expand_location_to_spelling_point (src_range.m_finish);
1356 if (start.file != finish.file)
1357 return "range endpoints are in different files";
1358 if (start.line != finish.line)
1359 return "range endpoints are on different lines";
1360 if (start.column > finish.column)
1361 return "range endpoints are reversed";
1363 int line_width;
1364 const char *line = location_get_source_line (start.file, start.line,
1365 &line_width);
1366 if (line == NULL)
1367 return "unable to read source line";
1369 /* Determine the location of the literal (including quotes
1370 and leading prefix chars, such as the 'u' in a u""
1371 token). */
1372 const char *literal = line + start.column - 1;
1373 int literal_length = finish.column - start.column + 1;
1375 gcc_assert (line_width >= (start.column - 1 + literal_length));
1376 cpp_string from;
1377 from.len = literal_length;
1378 /* Make a copy of the literal, to avoid having to rely on
1379 the lifetime of the copy of the line within the cache.
1380 This will be released by the auto_cpp_string_vec dtor. */
1381 from.text = XDUPVEC (unsigned char, literal, literal_length);
1382 strs.safe_push (from);
1384 /* For very long lines, a new linemap could have started
1385 halfway through the token.
1386 Ensure that the loc_reader uses the linemap of the
1387 *end* of the token for its start location. */
1388 const line_map_ordinary *final_ord_map;
1389 linemap_resolve_location (line_table, src_range.m_finish,
1390 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1391 location_t start_loc
1392 = linemap_position_for_line_and_column (line_table, final_ord_map,
1393 start.line, start.column);
1395 cpp_string_location_reader loc_reader (start_loc, line_table);
1396 loc_readers.safe_push (loc_reader);
1399 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1400 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1401 loc_readers.address (),
1402 num_locs, &ranges, type);
1403 if (err)
1404 return err;
1406 /* Success: "ranges" should now contain information on the string. */
1407 return NULL;
1410 /* Attempt to populate *OUT_LOC with source location information on the
1411 given characters within the string literal found at STRLOC.
1412 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1413 character set.
1415 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1416 and string literal "012345\n789"
1417 *OUT_LOC is written to with:
1418 "012345\n789"
1419 ~^~~~~
1421 If CONCATS is non-NULL, then any string literals that the token at
1422 STRLOC was concatenated with are also considered.
1424 This is implemented by re-parsing the relevant source line(s).
1426 Return NULL if successful, or an error message if any errors occurred.
1427 Error messages are intended for GCC developers (to help debugging) rather
1428 than for end-users. */
1430 const char *
1431 get_source_location_for_substring (cpp_reader *pfile,
1432 string_concat_db *concats,
1433 location_t strloc,
1434 enum cpp_ttype type,
1435 int caret_idx, int start_idx, int end_idx,
1436 source_location *out_loc)
1438 gcc_checking_assert (caret_idx >= 0);
1439 gcc_checking_assert (start_idx >= 0);
1440 gcc_checking_assert (end_idx >= 0);
1441 gcc_assert (out_loc);
1443 cpp_substring_ranges ranges;
1444 const char *err
1445 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1446 if (err)
1447 return err;
1449 if (caret_idx >= ranges.get_num_ranges ())
1450 return "caret_idx out of range";
1451 if (start_idx >= ranges.get_num_ranges ())
1452 return "start_idx out of range";
1453 if (end_idx >= ranges.get_num_ranges ())
1454 return "end_idx out of range";
1456 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1457 ranges.get_range (start_idx).m_start,
1458 ranges.get_range (end_idx).m_finish);
1459 return NULL;
1462 #if CHECKING_P
1464 namespace selftest {
1466 /* Selftests of location handling. */
1468 /* Attempt to populate *OUT_RANGE with source location information on the
1469 given character within the string literal found at STRLOC.
1470 CHAR_IDX refers to an offset within the execution character set.
1471 If CONCATS is non-NULL, then any string literals that the token at
1472 STRLOC was concatenated with are also considered.
1474 This is implemented by re-parsing the relevant source line(s).
1476 Return NULL if successful, or an error message if any errors occurred.
1477 Error messages are intended for GCC developers (to help debugging) rather
1478 than for end-users. */
1480 static const char *
1481 get_source_range_for_char (cpp_reader *pfile,
1482 string_concat_db *concats,
1483 location_t strloc,
1484 enum cpp_ttype type,
1485 int char_idx,
1486 source_range *out_range)
1488 gcc_checking_assert (char_idx >= 0);
1489 gcc_assert (out_range);
1491 cpp_substring_ranges ranges;
1492 const char *err
1493 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1494 if (err)
1495 return err;
1497 if (char_idx >= ranges.get_num_ranges ())
1498 return "char_idx out of range";
1500 *out_range = ranges.get_range (char_idx);
1501 return NULL;
1504 /* As get_source_range_for_char, but write to *OUT the number
1505 of ranges that are available. */
1507 static const char *
1508 get_num_source_ranges_for_substring (cpp_reader *pfile,
1509 string_concat_db *concats,
1510 location_t strloc,
1511 enum cpp_ttype type,
1512 int *out)
1514 gcc_assert (out);
1516 cpp_substring_ranges ranges;
1517 const char *err
1518 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1520 if (err)
1521 return err;
1523 *out = ranges.get_num_ranges ();
1524 return NULL;
1527 /* Selftests of location handling. */
1529 /* Helper function for verifying location data: when location_t
1530 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1531 as having column 0. */
1533 static bool
1534 should_have_column_data_p (location_t loc)
1536 if (IS_ADHOC_LOC (loc))
1537 loc = get_location_from_adhoc_loc (line_table, loc);
1538 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1539 return false;
1540 return true;
1543 /* Selftest for should_have_column_data_p. */
1545 static void
1546 test_should_have_column_data_p ()
1548 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1549 ASSERT_TRUE
1550 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1551 ASSERT_FALSE
1552 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1555 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1556 on LOC. */
1558 static void
1559 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1560 location_t loc)
1562 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1563 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1564 /* If location_t values are sufficiently high, then column numbers
1565 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1566 When close to the threshold, column numbers *may* be present: if
1567 the final linemap before the threshold contains a line that straddles
1568 the threshold, locations in that line have column information. */
1569 if (should_have_column_data_p (loc))
1570 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1573 /* Various selftests involve constructing a line table and one or more
1574 line maps within it.
1576 For maximum test coverage we want to run these tests with a variety
1577 of situations:
1578 - line_table->default_range_bits: some frontends use a non-zero value
1579 and others use zero
1580 - the fallback modes within line-map.c: there are various threshold
1581 values for source_location/location_t beyond line-map.c changes
1582 behavior (disabling of the range-packing optimization, disabling
1583 of column-tracking). We can exercise these by starting the line_table
1584 at interesting values at or near these thresholds.
1586 The following struct describes a particular case within our test
1587 matrix. */
1589 struct line_table_case
1591 line_table_case (int default_range_bits, int base_location)
1592 : m_default_range_bits (default_range_bits),
1593 m_base_location (base_location)
1596 int m_default_range_bits;
1597 int m_base_location;
1600 /* Constructor. Store the old value of line_table, and create a new
1601 one, using sane defaults. */
1603 line_table_test::line_table_test ()
1605 gcc_assert (saved_line_table == NULL);
1606 saved_line_table = line_table;
1607 line_table = ggc_alloc<line_maps> ();
1608 linemap_init (line_table, BUILTINS_LOCATION);
1609 gcc_assert (saved_line_table->reallocator);
1610 line_table->reallocator = saved_line_table->reallocator;
1611 gcc_assert (saved_line_table->round_alloc_size);
1612 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1613 line_table->default_range_bits = 0;
1616 /* Constructor. Store the old value of line_table, and create a new
1617 one, using the sitation described in CASE_. */
1619 line_table_test::line_table_test (const line_table_case &case_)
1621 gcc_assert (saved_line_table == NULL);
1622 saved_line_table = line_table;
1623 line_table = ggc_alloc<line_maps> ();
1624 linemap_init (line_table, BUILTINS_LOCATION);
1625 gcc_assert (saved_line_table->reallocator);
1626 line_table->reallocator = saved_line_table->reallocator;
1627 gcc_assert (saved_line_table->round_alloc_size);
1628 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1629 line_table->default_range_bits = case_.m_default_range_bits;
1630 if (case_.m_base_location)
1632 line_table->highest_location = case_.m_base_location;
1633 line_table->highest_line = case_.m_base_location;
1637 /* Destructor. Restore the old value of line_table. */
1639 line_table_test::~line_table_test ()
1641 gcc_assert (saved_line_table != NULL);
1642 line_table = saved_line_table;
1643 saved_line_table = NULL;
1646 /* Verify basic operation of ordinary linemaps. */
1648 static void
1649 test_accessing_ordinary_linemaps (const line_table_case &case_)
1651 line_table_test ltt (case_);
1653 /* Build a simple linemap describing some locations. */
1654 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1656 linemap_line_start (line_table, 1, 100);
1657 location_t loc_a = linemap_position_for_column (line_table, 1);
1658 location_t loc_b = linemap_position_for_column (line_table, 23);
1660 linemap_line_start (line_table, 2, 100);
1661 location_t loc_c = linemap_position_for_column (line_table, 1);
1662 location_t loc_d = linemap_position_for_column (line_table, 17);
1664 /* Example of a very long line. */
1665 linemap_line_start (line_table, 3, 2000);
1666 location_t loc_e = linemap_position_for_column (line_table, 700);
1668 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1670 /* Multiple files. */
1671 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1672 linemap_line_start (line_table, 1, 200);
1673 location_t loc_f = linemap_position_for_column (line_table, 150);
1674 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1676 /* Verify that we can recover the location info. */
1677 assert_loceq ("foo.c", 1, 1, loc_a);
1678 assert_loceq ("foo.c", 1, 23, loc_b);
1679 assert_loceq ("foo.c", 2, 1, loc_c);
1680 assert_loceq ("foo.c", 2, 17, loc_d);
1681 assert_loceq ("foo.c", 3, 700, loc_e);
1682 assert_loceq ("bar.c", 1, 150, loc_f);
1684 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1685 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1687 /* Verify using make_location to build a range, and extracting data
1688 back from it. */
1689 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1690 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1691 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1692 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1693 ASSERT_EQ (loc_b, src_range.m_start);
1694 ASSERT_EQ (loc_d, src_range.m_finish);
1697 /* Verify various properties of UNKNOWN_LOCATION. */
1699 static void
1700 test_unknown_location ()
1702 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1703 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1704 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1707 /* Verify various properties of BUILTINS_LOCATION. */
1709 static void
1710 test_builtins ()
1712 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1713 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1716 /* Regression test for make_location.
1717 Ensure that we use pure locations for the start/finish of the range,
1718 rather than storing a packed or ad-hoc range as the start/finish. */
1720 static void
1721 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1723 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1724 with C++ frontend.
1725 ....................0000000001111111111222.
1726 ....................1234567890123456789012. */
1727 const char *content = " r += !aaa == bbb;\n";
1728 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1729 line_table_test ltt (case_);
1730 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1732 const location_t c11 = linemap_position_for_column (line_table, 11);
1733 const location_t c12 = linemap_position_for_column (line_table, 12);
1734 const location_t c13 = linemap_position_for_column (line_table, 13);
1735 const location_t c14 = linemap_position_for_column (line_table, 14);
1736 const location_t c21 = linemap_position_for_column (line_table, 21);
1738 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1739 return;
1741 /* Use column 13 for the caret location, arbitrarily, to verify that we
1742 handle start != caret. */
1743 const location_t aaa = make_location (c13, c12, c14);
1744 ASSERT_EQ (c13, get_pure_location (aaa));
1745 ASSERT_EQ (c12, get_start (aaa));
1746 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1747 ASSERT_EQ (c14, get_finish (aaa));
1748 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1750 /* Make a location using a location with a range as the start-point. */
1751 const location_t not_aaa = make_location (c11, aaa, c14);
1752 ASSERT_EQ (c11, get_pure_location (not_aaa));
1753 /* It should use the start location of the range, not store the range
1754 itself. */
1755 ASSERT_EQ (c12, get_start (not_aaa));
1756 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1757 ASSERT_EQ (c14, get_finish (not_aaa));
1758 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1760 /* Similarly, make a location with a range as the end-point. */
1761 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1762 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1763 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1764 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1765 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1766 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1767 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1768 /* It should use the finish location of the range, not store the range
1769 itself. */
1770 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1771 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1772 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1773 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1774 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1777 /* Verify reading of input files (e.g. for caret-based diagnostics). */
1779 static void
1780 test_reading_source_line ()
1782 /* Create a tempfile and write some text to it. */
1783 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1784 "01234567890123456789\n"
1785 "This is the test text\n"
1786 "This is the 3rd line");
1788 /* Read back a specific line from the tempfile. */
1789 int line_size;
1790 const char *source_line = location_get_source_line (tmp.get_filename (),
1791 3, &line_size);
1792 ASSERT_TRUE (source_line != NULL);
1793 ASSERT_EQ (20, line_size);
1794 ASSERT_TRUE (!strncmp ("This is the 3rd line",
1795 source_line, line_size));
1797 source_line = location_get_source_line (tmp.get_filename (),
1798 2, &line_size);
1799 ASSERT_TRUE (source_line != NULL);
1800 ASSERT_EQ (21, line_size);
1801 ASSERT_TRUE (!strncmp ("This is the test text",
1802 source_line, line_size));
1804 source_line = location_get_source_line (tmp.get_filename (),
1805 4, &line_size);
1806 ASSERT_TRUE (source_line == NULL);
1809 /* Tests of lexing. */
1811 /* Verify that token TOK from PARSER has cpp_token_as_text
1812 equal to EXPECTED_TEXT. */
1814 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1815 SELFTEST_BEGIN_STMT \
1816 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1817 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1818 SELFTEST_END_STMT
1820 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1821 and ranges from EXP_START_COL to EXP_FINISH_COL.
1822 Use LOC as the effective location of the selftest. */
1824 static void
1825 assert_token_loc_eq (const location &loc,
1826 const cpp_token *tok,
1827 const char *exp_filename, int exp_linenum,
1828 int exp_start_col, int exp_finish_col)
1830 location_t tok_loc = tok->src_loc;
1831 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1832 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1834 /* If location_t values are sufficiently high, then column numbers
1835 will be unavailable. */
1836 if (!should_have_column_data_p (tok_loc))
1837 return;
1839 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1840 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1841 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1842 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1845 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1846 SELFTEST_LOCATION as the effective location of the selftest. */
1848 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1849 EXP_START_COL, EXP_FINISH_COL) \
1850 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1851 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1853 /* Test of lexing a file using libcpp, verifying tokens and their
1854 location information. */
1856 static void
1857 test_lexer (const line_table_case &case_)
1859 /* Create a tempfile and write some text to it. */
1860 const char *content =
1861 /*00000000011111111112222222222333333.3333444444444.455555555556
1862 12345678901234567890123456789012345.6789012345678.901234567890. */
1863 ("test_name /* c-style comment */\n"
1864 " \"test literal\"\n"
1865 " // test c++-style comment\n"
1866 " 42\n");
1867 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1869 line_table_test ltt (case_);
1871 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1873 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1874 ASSERT_NE (fname, NULL);
1876 /* Verify that we get the expected tokens back, with the correct
1877 location information. */
1879 location_t loc;
1880 const cpp_token *tok;
1881 tok = cpp_get_token_with_location (parser, &loc);
1882 ASSERT_NE (tok, NULL);
1883 ASSERT_EQ (tok->type, CPP_NAME);
1884 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1885 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1887 tok = cpp_get_token_with_location (parser, &loc);
1888 ASSERT_NE (tok, NULL);
1889 ASSERT_EQ (tok->type, CPP_STRING);
1890 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1891 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1893 tok = cpp_get_token_with_location (parser, &loc);
1894 ASSERT_NE (tok, NULL);
1895 ASSERT_EQ (tok->type, CPP_NUMBER);
1896 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1897 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1899 tok = cpp_get_token_with_location (parser, &loc);
1900 ASSERT_NE (tok, NULL);
1901 ASSERT_EQ (tok->type, CPP_EOF);
1903 cpp_finish (parser, NULL);
1904 cpp_destroy (parser);
1907 /* Forward decls. */
1909 struct lexer_test;
1910 class lexer_test_options;
1912 /* A class for specifying options of a lexer_test.
1913 The "apply" vfunc is called during the lexer_test constructor. */
1915 class lexer_test_options
1917 public:
1918 virtual void apply (lexer_test &) = 0;
1921 /* A struct for writing lexer tests. */
1923 struct lexer_test
1925 lexer_test (const line_table_case &case_, const char *content,
1926 lexer_test_options *options);
1927 ~lexer_test ();
1929 const cpp_token *get_token ();
1931 temp_source_file m_tempfile;
1932 line_table_test m_ltt;
1933 cpp_reader *m_parser;
1934 string_concat_db m_concats;
1937 /* Use an EBCDIC encoding for the execution charset, specifically
1938 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
1940 This exercises iconv integration within libcpp.
1941 Not every build of iconv supports the given charset,
1942 so we need to flag this error and handle it gracefully. */
1944 class ebcdic_execution_charset : public lexer_test_options
1946 public:
1947 ebcdic_execution_charset () : m_num_iconv_errors (0)
1949 gcc_assert (s_singleton == NULL);
1950 s_singleton = this;
1952 ~ebcdic_execution_charset ()
1954 gcc_assert (s_singleton == this);
1955 s_singleton = NULL;
1958 void apply (lexer_test &test) FINAL OVERRIDE
1960 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
1961 cpp_opts->narrow_charset = "IBM1047";
1963 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
1964 callbacks->error = on_error;
1967 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
1968 int level ATTRIBUTE_UNUSED,
1969 int reason ATTRIBUTE_UNUSED,
1970 rich_location *richloc ATTRIBUTE_UNUSED,
1971 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
1972 ATTRIBUTE_FPTR_PRINTF(5,0)
1974 gcc_assert (s_singleton);
1975 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
1976 when the local iconv build doesn't support the conversion. */
1977 if (strstr (msgid, "not supported by iconv"))
1979 s_singleton->m_num_iconv_errors++;
1980 return true;
1983 /* Otherwise, we have an unexpected error. */
1984 abort ();
1987 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
1989 private:
1990 static ebcdic_execution_charset *s_singleton;
1991 int m_num_iconv_errors;
1994 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
1996 /* Constructor. Override line_table with a new instance based on CASE_,
1997 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
1998 start parsing the tempfile. */
2000 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2001 lexer_test_options *options) :
2002 /* Create a tempfile and write the text to it. */
2003 m_tempfile (SELFTEST_LOCATION, ".c", content),
2004 m_ltt (case_),
2005 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2006 m_concats ()
2008 if (options)
2009 options->apply (*this);
2011 cpp_init_iconv (m_parser);
2013 /* Parse the file. */
2014 const char *fname = cpp_read_main_file (m_parser,
2015 m_tempfile.get_filename ());
2016 ASSERT_NE (fname, NULL);
2019 /* Destructor. Verify that the next token in m_parser is EOF. */
2021 lexer_test::~lexer_test ()
2023 location_t loc;
2024 const cpp_token *tok;
2026 tok = cpp_get_token_with_location (m_parser, &loc);
2027 ASSERT_NE (tok, NULL);
2028 ASSERT_EQ (tok->type, CPP_EOF);
2030 cpp_finish (m_parser, NULL);
2031 cpp_destroy (m_parser);
2034 /* Get the next token from m_parser. */
2036 const cpp_token *
2037 lexer_test::get_token ()
2039 location_t loc;
2040 const cpp_token *tok;
2042 tok = cpp_get_token_with_location (m_parser, &loc);
2043 ASSERT_NE (tok, NULL);
2044 return tok;
2047 /* Verify that locations within string literals are correctly handled. */
2049 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2050 using the string concatenation database for TEST.
2052 Assert that the character at index IDX is on EXPECTED_LINE,
2053 and that it begins at column EXPECTED_START_COL and ends at
2054 EXPECTED_FINISH_COL (unless the locations are beyond
2055 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2056 columns). */
2058 static void
2059 assert_char_at_range (const location &loc,
2060 lexer_test& test,
2061 location_t strloc, enum cpp_ttype type, int idx,
2062 int expected_line, int expected_start_col,
2063 int expected_finish_col)
2065 cpp_reader *pfile = test.m_parser;
2066 string_concat_db *concats = &test.m_concats;
2068 source_range actual_range;
2069 const char *err
2070 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2071 &actual_range);
2072 if (should_have_column_data_p (strloc))
2073 ASSERT_EQ_AT (loc, NULL, err);
2074 else
2076 ASSERT_STREQ_AT (loc,
2077 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2078 err);
2079 return;
2082 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2083 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2084 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2085 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2087 if (should_have_column_data_p (actual_range.m_start))
2089 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2090 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2092 if (should_have_column_data_p (actual_range.m_finish))
2094 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2095 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2099 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2100 the effective location of any errors. */
2102 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2103 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2104 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2105 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2106 (EXPECTED_FINISH_COL))
2108 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2109 using the string concatenation database for TEST.
2111 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2113 static void
2114 assert_num_substring_ranges (const location &loc,
2115 lexer_test& test,
2116 location_t strloc,
2117 enum cpp_ttype type,
2118 int expected_num_ranges)
2120 cpp_reader *pfile = test.m_parser;
2121 string_concat_db *concats = &test.m_concats;
2123 int actual_num_ranges = -1;
2124 const char *err
2125 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2126 &actual_num_ranges);
2127 if (should_have_column_data_p (strloc))
2128 ASSERT_EQ_AT (loc, NULL, err);
2129 else
2131 ASSERT_STREQ_AT (loc,
2132 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2133 err);
2134 return;
2136 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2139 /* Macro for calling assert_num_substring_ranges, supplying
2140 SELFTEST_LOCATION for the effective location of any errors. */
2142 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2143 EXPECTED_NUM_RANGES) \
2144 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2145 (TYPE), (EXPECTED_NUM_RANGES))
2148 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2149 returns an error (using the string concatenation database for TEST). */
2151 static void
2152 assert_has_no_substring_ranges (const location &loc,
2153 lexer_test& test,
2154 location_t strloc,
2155 enum cpp_ttype type,
2156 const char *expected_err)
2158 cpp_reader *pfile = test.m_parser;
2159 string_concat_db *concats = &test.m_concats;
2160 cpp_substring_ranges ranges;
2161 const char *actual_err
2162 = get_substring_ranges_for_loc (pfile, concats, strloc,
2163 type, ranges);
2164 if (should_have_column_data_p (strloc))
2165 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2166 else
2167 ASSERT_STREQ_AT (loc,
2168 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2169 actual_err);
2172 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2173 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2174 (STRLOC), (TYPE), (ERR))
2176 /* Lex a simple string literal. Verify the substring location data, before
2177 and after running cpp_interpret_string on it. */
2179 static void
2180 test_lexer_string_locations_simple (const line_table_case &case_)
2182 /* Digits 0-9 (with 0 at column 10), the simple way.
2183 ....................000000000.11111111112.2222222223333333333
2184 ....................123456789.01234567890.1234567890123456789
2185 We add a trailing comment to ensure that we correctly locate
2186 the end of the string literal token. */
2187 const char *content = " \"0123456789\" /* not a string */\n";
2188 lexer_test test (case_, content, NULL);
2190 /* Verify that we get the expected token back, with the correct
2191 location information. */
2192 const cpp_token *tok = test.get_token ();
2193 ASSERT_EQ (tok->type, CPP_STRING);
2194 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2195 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2197 /* At this point in lexing, the quote characters are treated as part of
2198 the string (they are stripped off by cpp_interpret_string). */
2200 ASSERT_EQ (tok->val.str.len, 12);
2202 /* Verify that cpp_interpret_string works. */
2203 cpp_string dst_string;
2204 const enum cpp_ttype type = CPP_STRING;
2205 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2206 &dst_string, type);
2207 ASSERT_TRUE (result);
2208 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2209 free (const_cast <unsigned char *> (dst_string.text));
2211 /* Verify ranges of individual characters. This no longer includes the
2212 opening quote, but does include the closing quote. */
2213 for (int i = 0; i <= 10; i++)
2214 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2215 10 + i, 10 + i);
2217 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2220 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2221 encoding. */
2223 static void
2224 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2226 /* EBCDIC support requires iconv. */
2227 if (!HAVE_ICONV)
2228 return;
2230 /* Digits 0-9 (with 0 at column 10), the simple way.
2231 ....................000000000.11111111112.2222222223333333333
2232 ....................123456789.01234567890.1234567890123456789
2233 We add a trailing comment to ensure that we correctly locate
2234 the end of the string literal token. */
2235 const char *content = " \"0123456789\" /* not a string */\n";
2236 ebcdic_execution_charset use_ebcdic;
2237 lexer_test test (case_, content, &use_ebcdic);
2239 /* Verify that we get the expected token back, with the correct
2240 location information. */
2241 const cpp_token *tok = test.get_token ();
2242 ASSERT_EQ (tok->type, CPP_STRING);
2243 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2244 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2246 /* At this point in lexing, the quote characters are treated as part of
2247 the string (they are stripped off by cpp_interpret_string). */
2249 ASSERT_EQ (tok->val.str.len, 12);
2251 /* The remainder of the test requires an iconv implementation that
2252 can convert from UTF-8 to the EBCDIC encoding requested above. */
2253 if (use_ebcdic.iconv_errors_occurred_p ())
2254 return;
2256 /* Verify that cpp_interpret_string works. */
2257 cpp_string dst_string;
2258 const enum cpp_ttype type = CPP_STRING;
2259 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2260 &dst_string, type);
2261 ASSERT_TRUE (result);
2262 /* We should now have EBCDIC-encoded text, specifically
2263 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2264 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2265 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2266 (const char *)dst_string.text);
2267 free (const_cast <unsigned char *> (dst_string.text));
2269 /* Verify that we don't attempt to record substring location information
2270 for such cases. */
2271 ASSERT_HAS_NO_SUBSTRING_RANGES
2272 (test, tok->src_loc, type,
2273 "execution character set != source character set");
2276 /* Lex a string literal containing a hex-escaped character.
2277 Verify the substring location data, before and after running
2278 cpp_interpret_string on it. */
2280 static void
2281 test_lexer_string_locations_hex (const line_table_case &case_)
2283 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2284 and with a space in place of digit 6, to terminate the escaped
2285 hex code.
2286 ....................000000000.111111.11112222.
2287 ....................123456789.012345.67890123. */
2288 const char *content = " \"01234\\x35 789\"\n";
2289 lexer_test test (case_, content, NULL);
2291 /* Verify that we get the expected token back, with the correct
2292 location information. */
2293 const cpp_token *tok = test.get_token ();
2294 ASSERT_EQ (tok->type, CPP_STRING);
2295 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2296 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2298 /* At this point in lexing, the quote characters are treated as part of
2299 the string (they are stripped off by cpp_interpret_string). */
2300 ASSERT_EQ (tok->val.str.len, 15);
2302 /* Verify that cpp_interpret_string works. */
2303 cpp_string dst_string;
2304 const enum cpp_ttype type = CPP_STRING;
2305 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2306 &dst_string, type);
2307 ASSERT_TRUE (result);
2308 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2309 free (const_cast <unsigned char *> (dst_string.text));
2311 /* Verify ranges of individual characters. This no longer includes the
2312 opening quote, but does include the closing quote. */
2313 for (int i = 0; i <= 4; i++)
2314 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2315 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2316 for (int i = 6; i <= 10; i++)
2317 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2319 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2322 /* Lex a string literal containing an octal-escaped character.
2323 Verify the substring location data after running cpp_interpret_string
2324 on it. */
2326 static void
2327 test_lexer_string_locations_oct (const line_table_case &case_)
2329 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2330 and with a space in place of digit 6, to terminate the escaped
2331 octal code.
2332 ....................000000000.111111.11112222.2222223333333333444
2333 ....................123456789.012345.67890123.4567890123456789012 */
2334 const char *content = " \"01234\\065 789\" /* not a string */\n";
2335 lexer_test test (case_, content, NULL);
2337 /* Verify that we get the expected token back, with the correct
2338 location information. */
2339 const cpp_token *tok = test.get_token ();
2340 ASSERT_EQ (tok->type, CPP_STRING);
2341 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2343 /* Verify that cpp_interpret_string works. */
2344 cpp_string dst_string;
2345 const enum cpp_ttype type = CPP_STRING;
2346 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2347 &dst_string, type);
2348 ASSERT_TRUE (result);
2349 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2350 free (const_cast <unsigned char *> (dst_string.text));
2352 /* Verify ranges of individual characters. This no longer includes the
2353 opening quote, but does include the closing quote. */
2354 for (int i = 0; i < 5; i++)
2355 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2356 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2357 for (int i = 6; i <= 10; i++)
2358 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2360 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2363 /* Test of string literal containing letter escapes. */
2365 static void
2366 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2368 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2369 .....................000000000.1.11111.1.1.11222.22222223333333
2370 .....................123456789.0.12345.6.7.89012.34567890123456. */
2371 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2372 lexer_test test (case_, content, NULL);
2374 /* Verify that we get the expected tokens back. */
2375 const cpp_token *tok = test.get_token ();
2376 ASSERT_EQ (tok->type, CPP_STRING);
2377 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2379 /* Verify ranges of individual characters. */
2380 /* "\t". */
2381 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2382 0, 1, 10, 11);
2383 /* "foo". */
2384 for (int i = 1; i <= 3; i++)
2385 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2386 i, 1, 11 + i, 11 + i);
2387 /* "\\" and "\n". */
2388 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2389 4, 1, 15, 16);
2390 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2391 5, 1, 17, 18);
2393 /* "bar" and closing quote for nul-terminator. */
2394 for (int i = 6; i <= 9; i++)
2395 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2396 i, 1, 13 + i, 13 + i);
2398 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2401 /* Another test of a string literal containing a letter escape.
2402 Based on string seen in
2403 printf ("%-%\n");
2404 in gcc.dg/format/c90-printf-1.c. */
2406 static void
2407 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2409 /* .....................000000000.1111.11.1111.22222222223.
2410 .....................123456789.0123.45.6789.01234567890. */
2411 const char *content = (" \"%-%\\n\" /* non-str */\n");
2412 lexer_test test (case_, content, NULL);
2414 /* Verify that we get the expected tokens back. */
2415 const cpp_token *tok = test.get_token ();
2416 ASSERT_EQ (tok->type, CPP_STRING);
2417 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2419 /* Verify ranges of individual characters. */
2420 /* "%-%". */
2421 for (int i = 0; i < 3; i++)
2422 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2423 i, 1, 10 + i, 10 + i);
2424 /* "\n". */
2425 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2426 3, 1, 13, 14);
2428 /* Closing quote for nul-terminator. */
2429 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2430 4, 1, 15, 15);
2432 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2435 /* Lex a string literal containing UCN 4 characters.
2436 Verify the substring location data after running cpp_interpret_string
2437 on it. */
2439 static void
2440 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2442 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2443 as UCN 4.
2444 ....................000000000.111111.111122.222222223.33333333344444
2445 ....................123456789.012345.678901.234567890.12345678901234 */
2446 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2447 lexer_test test (case_, content, NULL);
2449 /* Verify that we get the expected token back, with the correct
2450 location information. */
2451 const cpp_token *tok = test.get_token ();
2452 ASSERT_EQ (tok->type, CPP_STRING);
2453 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2455 /* Verify that cpp_interpret_string works.
2456 The string should be encoded in the execution character
2457 set. Assuming that that is UTF-8, we should have the following:
2458 ----------- ---- ----- ------- ----------------
2459 Byte offset Byte Octal Unicode Source Column(s)
2460 ----------- ---- ----- ------- ----------------
2461 0 0x30 '0' 10
2462 1 0x31 '1' 11
2463 2 0x32 '2' 12
2464 3 0x33 '3' 13
2465 4 0x34 '4' 14
2466 5 0xE2 \342 U+2174 15-20
2467 6 0x85 \205 (cont) 15-20
2468 7 0xB4 \264 (cont) 15-20
2469 8 0xE2 \342 U+2175 21-26
2470 9 0x85 \205 (cont) 21-26
2471 10 0xB5 \265 (cont) 21-26
2472 11 0x37 '7' 27
2473 12 0x38 '8' 28
2474 13 0x39 '9' 29
2475 14 0x00 30 (closing quote)
2476 ----------- ---- ----- ------- ---------------. */
2478 cpp_string dst_string;
2479 const enum cpp_ttype type = CPP_STRING;
2480 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2481 &dst_string, type);
2482 ASSERT_TRUE (result);
2483 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2484 (const char *)dst_string.text);
2485 free (const_cast <unsigned char *> (dst_string.text));
2487 /* Verify ranges of individual characters. This no longer includes the
2488 opening quote, but does include the closing quote.
2489 '01234'. */
2490 for (int i = 0; i <= 4; i++)
2491 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2492 /* U+2174. */
2493 for (int i = 5; i <= 7; i++)
2494 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2495 /* U+2175. */
2496 for (int i = 8; i <= 10; i++)
2497 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2498 /* '789' and nul terminator */
2499 for (int i = 11; i <= 14; i++)
2500 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2502 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2505 /* Lex a string literal containing UCN 8 characters.
2506 Verify the substring location data after running cpp_interpret_string
2507 on it. */
2509 static void
2510 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2512 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2513 ....................000000000.111111.1111222222.2222333333333.344444
2514 ....................123456789.012345.6789012345.6789012345678.901234 */
2515 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2516 lexer_test test (case_, content, NULL);
2518 /* Verify that we get the expected token back, with the correct
2519 location information. */
2520 const cpp_token *tok = test.get_token ();
2521 ASSERT_EQ (tok->type, CPP_STRING);
2522 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2523 "\"01234\\U00002174\\U00002175789\"");
2525 /* Verify that cpp_interpret_string works.
2526 The UTF-8 encoding of the string is identical to that from
2527 the ucn4 testcase above; the only difference is the column
2528 locations. */
2529 cpp_string dst_string;
2530 const enum cpp_ttype type = CPP_STRING;
2531 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2532 &dst_string, type);
2533 ASSERT_TRUE (result);
2534 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2535 (const char *)dst_string.text);
2536 free (const_cast <unsigned char *> (dst_string.text));
2538 /* Verify ranges of individual characters. This no longer includes the
2539 opening quote, but does include the closing quote.
2540 '01234'. */
2541 for (int i = 0; i <= 4; i++)
2542 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2543 /* U+2174. */
2544 for (int i = 5; i <= 7; i++)
2545 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2546 /* U+2175. */
2547 for (int i = 8; i <= 10; i++)
2548 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2549 /* '789' at columns 35-37 */
2550 for (int i = 11; i <= 13; i++)
2551 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2552 /* Closing quote/nul-terminator at column 38. */
2553 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2555 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2558 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2560 static uint32_t
2561 uint32_from_big_endian (const uint32_t *ptr_be_value)
2563 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2564 return (((uint32_t) buf[0] << 24)
2565 | ((uint32_t) buf[1] << 16)
2566 | ((uint32_t) buf[2] << 8)
2567 | (uint32_t) buf[3]);
2570 /* Lex a wide string literal and verify that attempts to read substring
2571 location data from it fail gracefully. */
2573 static void
2574 test_lexer_string_locations_wide_string (const line_table_case &case_)
2576 /* Digits 0-9.
2577 ....................000000000.11111111112.22222222233333
2578 ....................123456789.01234567890.12345678901234 */
2579 const char *content = " L\"0123456789\" /* non-str */\n";
2580 lexer_test test (case_, content, NULL);
2582 /* Verify that we get the expected token back, with the correct
2583 location information. */
2584 const cpp_token *tok = test.get_token ();
2585 ASSERT_EQ (tok->type, CPP_WSTRING);
2586 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2588 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2589 cpp_string dst_string;
2590 const enum cpp_ttype type = CPP_WSTRING;
2591 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2592 &dst_string, type);
2593 ASSERT_TRUE (result);
2594 /* The cpp_reader defaults to big-endian with
2595 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2596 now be encoded as UTF-32BE. */
2597 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2598 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2599 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2600 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2601 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2602 free (const_cast <unsigned char *> (dst_string.text));
2604 /* We don't yet support generating substring location information
2605 for L"" strings. */
2606 ASSERT_HAS_NO_SUBSTRING_RANGES
2607 (test, tok->src_loc, type,
2608 "execution character set != source character set");
2611 /* Fetch a big-endian 16-bit value and convert to host endianness. */
2613 static uint16_t
2614 uint16_from_big_endian (const uint16_t *ptr_be_value)
2616 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2617 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2620 /* Lex a u"" string literal and verify that attempts to read substring
2621 location data from it fail gracefully. */
2623 static void
2624 test_lexer_string_locations_string16 (const line_table_case &case_)
2626 /* Digits 0-9.
2627 ....................000000000.11111111112.22222222233333
2628 ....................123456789.01234567890.12345678901234 */
2629 const char *content = " u\"0123456789\" /* non-str */\n";
2630 lexer_test test (case_, content, NULL);
2632 /* Verify that we get the expected token back, with the correct
2633 location information. */
2634 const cpp_token *tok = test.get_token ();
2635 ASSERT_EQ (tok->type, CPP_STRING16);
2636 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2638 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2639 cpp_string dst_string;
2640 const enum cpp_ttype type = CPP_STRING16;
2641 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2642 &dst_string, type);
2643 ASSERT_TRUE (result);
2645 /* The cpp_reader defaults to big-endian, so dst_string should
2646 now be encoded as UTF-16BE. */
2647 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2648 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2649 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2650 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2651 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2652 free (const_cast <unsigned char *> (dst_string.text));
2654 /* We don't yet support generating substring location information
2655 for L"" strings. */
2656 ASSERT_HAS_NO_SUBSTRING_RANGES
2657 (test, tok->src_loc, type,
2658 "execution character set != source character set");
2661 /* Lex a U"" string literal and verify that attempts to read substring
2662 location data from it fail gracefully. */
2664 static void
2665 test_lexer_string_locations_string32 (const line_table_case &case_)
2667 /* Digits 0-9.
2668 ....................000000000.11111111112.22222222233333
2669 ....................123456789.01234567890.12345678901234 */
2670 const char *content = " U\"0123456789\" /* non-str */\n";
2671 lexer_test test (case_, content, NULL);
2673 /* Verify that we get the expected token back, with the correct
2674 location information. */
2675 const cpp_token *tok = test.get_token ();
2676 ASSERT_EQ (tok->type, CPP_STRING32);
2677 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2679 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2680 cpp_string dst_string;
2681 const enum cpp_ttype type = CPP_STRING32;
2682 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2683 &dst_string, type);
2684 ASSERT_TRUE (result);
2686 /* The cpp_reader defaults to big-endian, so dst_string should
2687 now be encoded as UTF-32BE. */
2688 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2689 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2690 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2691 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2692 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2693 free (const_cast <unsigned char *> (dst_string.text));
2695 /* We don't yet support generating substring location information
2696 for L"" strings. */
2697 ASSERT_HAS_NO_SUBSTRING_RANGES
2698 (test, tok->src_loc, type,
2699 "execution character set != source character set");
2702 /* Lex a u8-string literal.
2703 Verify the substring location data after running cpp_interpret_string
2704 on it. */
2706 static void
2707 test_lexer_string_locations_u8 (const line_table_case &case_)
2709 /* Digits 0-9.
2710 ....................000000000.11111111112.22222222233333
2711 ....................123456789.01234567890.12345678901234 */
2712 const char *content = " u8\"0123456789\" /* non-str */\n";
2713 lexer_test test (case_, content, NULL);
2715 /* Verify that we get the expected token back, with the correct
2716 location information. */
2717 const cpp_token *tok = test.get_token ();
2718 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2719 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2721 /* Verify that cpp_interpret_string works. */
2722 cpp_string dst_string;
2723 const enum cpp_ttype type = CPP_STRING;
2724 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2725 &dst_string, type);
2726 ASSERT_TRUE (result);
2727 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2728 free (const_cast <unsigned char *> (dst_string.text));
2730 /* Verify ranges of individual characters. This no longer includes the
2731 opening quote, but does include the closing quote. */
2732 for (int i = 0; i <= 10; i++)
2733 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2736 /* Lex a string literal containing UTF-8 source characters.
2737 Verify the substring location data after running cpp_interpret_string
2738 on it. */
2740 static void
2741 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2743 /* This string literal is written out to the source file as UTF-8,
2744 and is of the form "before mojibake after", where "mojibake"
2745 is written as the following four unicode code points:
2746 U+6587 CJK UNIFIED IDEOGRAPH-6587
2747 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2748 U+5316 CJK UNIFIED IDEOGRAPH-5316
2749 U+3051 HIRAGANA LETTER KE.
2750 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2751 "before" and "after" are 1 byte per unicode character.
2753 The numbering shown are "columns", which are *byte* numbers within
2754 the line, rather than unicode character numbers.
2756 .................... 000000000.1111111.
2757 .................... 123456789.0123456. */
2758 const char *content = (" \"before "
2759 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2760 UTF-8: 0xE6 0x96 0x87
2761 C octal escaped UTF-8: \346\226\207
2762 "column" numbers: 17-19. */
2763 "\346\226\207"
2765 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2766 UTF-8: 0xE5 0xAD 0x97
2767 C octal escaped UTF-8: \345\255\227
2768 "column" numbers: 20-22. */
2769 "\345\255\227"
2771 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2772 UTF-8: 0xE5 0x8C 0x96
2773 C octal escaped UTF-8: \345\214\226
2774 "column" numbers: 23-25. */
2775 "\345\214\226"
2777 /* U+3051 HIRAGANA LETTER KE
2778 UTF-8: 0xE3 0x81 0x91
2779 C octal escaped UTF-8: \343\201\221
2780 "column" numbers: 26-28. */
2781 "\343\201\221"
2783 /* column numbers 29 onwards
2784 2333333.33334444444444
2785 9012345.67890123456789. */
2786 " after\" /* non-str */\n");
2787 lexer_test test (case_, content, NULL);
2789 /* Verify that we get the expected token back, with the correct
2790 location information. */
2791 const cpp_token *tok = test.get_token ();
2792 ASSERT_EQ (tok->type, CPP_STRING);
2793 ASSERT_TOKEN_AS_TEXT_EQ
2794 (test.m_parser, tok,
2795 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2797 /* Verify that cpp_interpret_string works. */
2798 cpp_string dst_string;
2799 const enum cpp_ttype type = CPP_STRING;
2800 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2801 &dst_string, type);
2802 ASSERT_TRUE (result);
2803 ASSERT_STREQ
2804 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2805 (const char *)dst_string.text);
2806 free (const_cast <unsigned char *> (dst_string.text));
2808 /* Verify ranges of individual characters. This no longer includes the
2809 opening quote, but does include the closing quote.
2810 Assuming that both source and execution encodings are UTF-8, we have
2811 a run of 25 octets in each, plus the NUL terminator. */
2812 for (int i = 0; i < 25; i++)
2813 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2814 /* NUL-terminator should use the closing quote at column 35. */
2815 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
2817 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
2820 /* Test of string literal concatenation. */
2822 static void
2823 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
2825 /* Digits 0-9.
2826 .....................000000000.111111.11112222222222
2827 .....................123456789.012345.67890123456789. */
2828 const char *content = (" \"01234\" /* non-str */\n"
2829 " \"56789\" /* non-str */\n");
2830 lexer_test test (case_, content, NULL);
2832 location_t input_locs[2];
2834 /* Verify that we get the expected tokens back. */
2835 auto_vec <cpp_string> input_strings;
2836 const cpp_token *tok_a = test.get_token ();
2837 ASSERT_EQ (tok_a->type, CPP_STRING);
2838 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
2839 input_strings.safe_push (tok_a->val.str);
2840 input_locs[0] = tok_a->src_loc;
2842 const cpp_token *tok_b = test.get_token ();
2843 ASSERT_EQ (tok_b->type, CPP_STRING);
2844 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
2845 input_strings.safe_push (tok_b->val.str);
2846 input_locs[1] = tok_b->src_loc;
2848 /* Verify that cpp_interpret_string works. */
2849 cpp_string dst_string;
2850 const enum cpp_ttype type = CPP_STRING;
2851 bool result = cpp_interpret_string (test.m_parser,
2852 input_strings.address (), 2,
2853 &dst_string, type);
2854 ASSERT_TRUE (result);
2855 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2856 free (const_cast <unsigned char *> (dst_string.text));
2858 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2859 test.m_concats.record_string_concatenation (2, input_locs);
2861 location_t initial_loc = input_locs[0];
2863 /* "01234" on line 1. */
2864 for (int i = 0; i <= 4; i++)
2865 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2866 /* "56789" in line 2, plus its closing quote for the nul terminator. */
2867 for (int i = 5; i <= 10; i++)
2868 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
2870 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
2873 /* Another test of string literal concatenation. */
2875 static void
2876 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
2878 /* Digits 0-9.
2879 .....................000000000.111.11111112222222
2880 .....................123456789.012.34567890123456. */
2881 const char *content = (" \"01\" /* non-str */\n"
2882 " \"23\" /* non-str */\n"
2883 " \"45\" /* non-str */\n"
2884 " \"67\" /* non-str */\n"
2885 " \"89\" /* non-str */\n");
2886 lexer_test test (case_, content, NULL);
2888 auto_vec <cpp_string> input_strings;
2889 location_t input_locs[5];
2891 /* Verify that we get the expected tokens back. */
2892 for (int i = 0; i < 5; i++)
2894 const cpp_token *tok = test.get_token ();
2895 ASSERT_EQ (tok->type, CPP_STRING);
2896 input_strings.safe_push (tok->val.str);
2897 input_locs[i] = tok->src_loc;
2900 /* Verify that cpp_interpret_string works. */
2901 cpp_string dst_string;
2902 const enum cpp_ttype type = CPP_STRING;
2903 bool result = cpp_interpret_string (test.m_parser,
2904 input_strings.address (), 5,
2905 &dst_string, type);
2906 ASSERT_TRUE (result);
2907 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2908 free (const_cast <unsigned char *> (dst_string.text));
2910 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2911 test.m_concats.record_string_concatenation (5, input_locs);
2913 location_t initial_loc = input_locs[0];
2915 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
2916 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
2917 and expect get_source_range_for_substring to fail.
2918 However, for a string concatenation test, we can have a case
2919 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
2920 but subsequent strings can be after it.
2921 Attempting to detect this within assert_char_at_range
2922 would overcomplicate the logic for the common test cases, so
2923 we detect it here. */
2924 if (should_have_column_data_p (input_locs[0])
2925 && !should_have_column_data_p (input_locs[4]))
2927 /* Verify that get_source_range_for_substring gracefully rejects
2928 this case. */
2929 source_range actual_range;
2930 const char *err
2931 = get_source_range_for_char (test.m_parser, &test.m_concats,
2932 initial_loc, type, 0, &actual_range);
2933 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
2934 return;
2937 for (int i = 0; i < 5; i++)
2938 for (int j = 0; j < 2; j++)
2939 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
2940 i + 1, 10 + j, 10 + j);
2942 /* NUL-terminator should use the final closing quote at line 5 column 12. */
2943 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
2945 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
2948 /* Another test of string literal concatenation, this time combined with
2949 various kinds of escaped characters. */
2951 static void
2952 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
2954 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
2955 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
2956 const char *content
2957 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
2958 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
2959 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
2960 lexer_test test (case_, content, NULL);
2962 auto_vec <cpp_string> input_strings;
2963 location_t input_locs[4];
2965 /* Verify that we get the expected tokens back. */
2966 for (int i = 0; i < 4; i++)
2968 const cpp_token *tok = test.get_token ();
2969 ASSERT_EQ (tok->type, CPP_STRING);
2970 input_strings.safe_push (tok->val.str);
2971 input_locs[i] = tok->src_loc;
2974 /* Verify that cpp_interpret_string works. */
2975 cpp_string dst_string;
2976 const enum cpp_ttype type = CPP_STRING;
2977 bool result = cpp_interpret_string (test.m_parser,
2978 input_strings.address (), 4,
2979 &dst_string, type);
2980 ASSERT_TRUE (result);
2981 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2982 free (const_cast <unsigned char *> (dst_string.text));
2984 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2985 test.m_concats.record_string_concatenation (4, input_locs);
2987 location_t initial_loc = input_locs[0];
2989 for (int i = 0; i <= 4; i++)
2990 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2991 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
2992 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
2993 for (int i = 7; i <= 9; i++)
2994 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
2996 /* NUL-terminator should use the location of the final closing quote. */
2997 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
2999 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3002 /* Test of string literal in a macro. */
3004 static void
3005 test_lexer_string_locations_macro (const line_table_case &case_)
3007 /* Digits 0-9.
3008 .....................0000000001111111111.22222222223.
3009 .....................1234567890123456789.01234567890. */
3010 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3011 " MACRO");
3012 lexer_test test (case_, content, NULL);
3014 /* Verify that we get the expected tokens back. */
3015 const cpp_token *tok = test.get_token ();
3016 ASSERT_EQ (tok->type, CPP_PADDING);
3018 tok = test.get_token ();
3019 ASSERT_EQ (tok->type, CPP_STRING);
3020 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3022 /* Verify ranges of individual characters. We ought to
3023 see columns within the macro definition. */
3024 for (int i = 0; i <= 10; i++)
3025 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3026 i, 1, 20 + i, 20 + i);
3028 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3030 tok = test.get_token ();
3031 ASSERT_EQ (tok->type, CPP_PADDING);
3034 /* Test of stringification of a macro argument. */
3036 static void
3037 test_lexer_string_locations_stringified_macro_argument
3038 (const line_table_case &case_)
3040 /* .....................000000000111111111122222222223.
3041 .....................123456789012345678901234567890. */
3042 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3043 "MACRO(foo)\n");
3044 lexer_test test (case_, content, NULL);
3046 /* Verify that we get the expected token back. */
3047 const cpp_token *tok = test.get_token ();
3048 ASSERT_EQ (tok->type, CPP_PADDING);
3050 tok = test.get_token ();
3051 ASSERT_EQ (tok->type, CPP_STRING);
3052 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3054 /* We don't support getting the location of a stringified macro
3055 argument. Verify that it fails gracefully. */
3056 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3057 "cpp_interpret_string_1 failed");
3059 tok = test.get_token ();
3060 ASSERT_EQ (tok->type, CPP_PADDING);
3062 tok = test.get_token ();
3063 ASSERT_EQ (tok->type, CPP_PADDING);
3066 /* Ensure that we are fail gracefully if something attempts to pass
3067 in a location that isn't a string literal token. Seen on this code:
3069 const char a[] = " %d ";
3070 __builtin_printf (a, 0.5);
3073 when c-format.c erroneously used the indicated one-character
3074 location as the format string location, leading to a read past the
3075 end of a string buffer in cpp_interpret_string_1. */
3077 static void
3078 test_lexer_string_locations_non_string (const line_table_case &case_)
3080 /* .....................000000000111111111122222222223.
3081 .....................123456789012345678901234567890. */
3082 const char *content = (" a\n");
3083 lexer_test test (case_, content, NULL);
3085 /* Verify that we get the expected token back. */
3086 const cpp_token *tok = test.get_token ();
3087 ASSERT_EQ (tok->type, CPP_NAME);
3088 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3090 /* At this point, libcpp is attempting to interpret the name as a
3091 string literal, despite it not starting with a quote. We don't detect
3092 that, but we should at least fail gracefully. */
3093 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3094 "cpp_interpret_string_1 failed");
3097 /* Ensure that we can read substring information for a token which
3098 starts in one linemap and ends in another . Adapted from
3099 gcc.dg/cpp/pr69985.c. */
3101 static void
3102 test_lexer_string_locations_long_line (const line_table_case &case_)
3104 /* .....................000000.000111111111
3105 .....................123456.789012346789. */
3106 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3107 " \"0123456789012345678901234567890123456789"
3108 "0123456789012345678901234567890123456789"
3109 "0123456789012345678901234567890123456789"
3110 "0123456789\"\n");
3112 lexer_test test (case_, content, NULL);
3114 /* Verify that we get the expected token back. */
3115 const cpp_token *tok = test.get_token ();
3116 ASSERT_EQ (tok->type, CPP_STRING);
3118 if (!should_have_column_data_p (line_table->highest_location))
3119 return;
3121 /* Verify ranges of individual characters. */
3122 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3123 for (int i = 0; i < 131; i++)
3124 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3125 i, 2, 7 + i, 7 + i);
3128 /* Test of lexing char constants. */
3130 static void
3131 test_lexer_char_constants (const line_table_case &case_)
3133 /* Various char constants.
3134 .....................0000000001111111111.22222222223.
3135 .....................1234567890123456789.01234567890. */
3136 const char *content = (" 'a'\n"
3137 " u'a'\n"
3138 " U'a'\n"
3139 " L'a'\n"
3140 " 'abc'\n");
3141 lexer_test test (case_, content, NULL);
3143 /* Verify that we get the expected tokens back. */
3144 /* 'a'. */
3145 const cpp_token *tok = test.get_token ();
3146 ASSERT_EQ (tok->type, CPP_CHAR);
3147 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3149 unsigned int chars_seen;
3150 int unsignedp;
3151 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3152 &chars_seen, &unsignedp);
3153 ASSERT_EQ (cc, 'a');
3154 ASSERT_EQ (chars_seen, 1);
3156 /* u'a'. */
3157 tok = test.get_token ();
3158 ASSERT_EQ (tok->type, CPP_CHAR16);
3159 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3161 /* U'a'. */
3162 tok = test.get_token ();
3163 ASSERT_EQ (tok->type, CPP_CHAR32);
3164 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3166 /* L'a'. */
3167 tok = test.get_token ();
3168 ASSERT_EQ (tok->type, CPP_WCHAR);
3169 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3171 /* 'abc' (c-char-sequence). */
3172 tok = test.get_token ();
3173 ASSERT_EQ (tok->type, CPP_CHAR);
3174 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3176 /* A table of interesting location_t values, giving one axis of our test
3177 matrix. */
3179 static const location_t boundary_locations[] = {
3180 /* Zero means "don't override the default values for a new line_table". */
3183 /* An arbitrary non-zero value that isn't close to one of
3184 the boundary values below. */
3185 0x10000,
3187 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3188 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3189 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3190 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3191 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3192 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3194 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3195 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3196 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3197 LINE_MAP_MAX_LOCATION_WITH_COLS,
3198 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3199 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3202 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3204 void
3205 for_each_line_table_case (void (*testcase) (const line_table_case &))
3207 /* As noted above in the description of struct line_table_case,
3208 we want to explore a test matrix of interesting line_table
3209 situations, running various selftests for each case within the
3210 matrix. */
3212 /* Run all tests with:
3213 (a) line_table->default_range_bits == 0, and
3214 (b) line_table->default_range_bits == 5. */
3215 int num_cases_tested = 0;
3216 for (int default_range_bits = 0; default_range_bits <= 5;
3217 default_range_bits += 5)
3219 /* ...and use each of the "interesting" location values as
3220 the starting location within line_table. */
3221 const int num_boundary_locations
3222 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3223 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3225 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3227 testcase (c);
3229 num_cases_tested++;
3233 /* Verify that we fully covered the test matrix. */
3234 ASSERT_EQ (num_cases_tested, 2 * 12);
3237 /* Run all of the selftests within this file. */
3239 void
3240 input_c_tests ()
3242 test_should_have_column_data_p ();
3243 test_unknown_location ();
3244 test_builtins ();
3245 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3247 for_each_line_table_case (test_accessing_ordinary_linemaps);
3248 for_each_line_table_case (test_lexer);
3249 for_each_line_table_case (test_lexer_string_locations_simple);
3250 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3251 for_each_line_table_case (test_lexer_string_locations_hex);
3252 for_each_line_table_case (test_lexer_string_locations_oct);
3253 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3254 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3255 for_each_line_table_case (test_lexer_string_locations_ucn4);
3256 for_each_line_table_case (test_lexer_string_locations_ucn8);
3257 for_each_line_table_case (test_lexer_string_locations_wide_string);
3258 for_each_line_table_case (test_lexer_string_locations_string16);
3259 for_each_line_table_case (test_lexer_string_locations_string32);
3260 for_each_line_table_case (test_lexer_string_locations_u8);
3261 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3262 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3263 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3264 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3265 for_each_line_table_case (test_lexer_string_locations_macro);
3266 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3267 for_each_line_table_case (test_lexer_string_locations_non_string);
3268 for_each_line_table_case (test_lexer_string_locations_long_line);
3269 for_each_line_table_case (test_lexer_char_constants);
3271 test_reading_source_line ();
3274 } // namespace selftest
3276 #endif /* CHECKING_P */