2016-09-10 Bernd Edlinger <bernd.edlinger@hotmail.de>
[official-gcc.git] / gcc / input.c
blob55bff4891ef279e78858c5a3a093823f1ac330d2
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic-core.h"
25 #include "selftest.h"
26 #include "cpplib.h"
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
32 /* This is a cache used by get_next_line to store the content of a
33 file to be searched for file lines. */
34 struct fcache
36 /* These are information used to store a line boundary. */
37 struct line_info
39 /* The line number. It starts from 1. */
40 size_t line_num;
42 /* The position (byte count) of the beginning of the line,
43 relative to the file data pointer. This starts at zero. */
44 size_t start_pos;
46 /* The position (byte count) of the last byte of the line. This
47 normally points to the '\n' character, or to one byte after the
48 last byte of the file, if the file doesn't contain a '\n'
49 character. */
50 size_t end_pos;
52 line_info (size_t l, size_t s, size_t e)
53 : line_num (l), start_pos (s), end_pos (e)
56 line_info ()
57 :line_num (0), start_pos (0), end_pos (0)
61 /* The number of time this file has been accessed. This is used
62 to designate which file cache to evict from the cache
63 array. */
64 unsigned use_count;
66 const char *file_path;
68 FILE *fp;
70 /* This points to the content of the file that we've read so
71 far. */
72 char *data;
74 /* The size of the DATA array above.*/
75 size_t size;
77 /* The number of bytes read from the underlying file so far. This
78 must be less (or equal) than SIZE above. */
79 size_t nb_read;
81 /* The index of the beginning of the current line. */
82 size_t line_start_idx;
84 /* The number of the previous line read. This starts at 1. Zero
85 means we've read no line so far. */
86 size_t line_num;
88 /* This is the total number of lines of the current file. At the
89 moment, we try to get this information from the line map
90 subsystem. Note that this is just a hint. When using the C++
91 front-end, this hint is correct because the input file is then
92 completely tokenized before parsing starts; so the line map knows
93 the number of lines before compilation really starts. For e.g,
94 the C front-end, it can happen that we start emitting diagnostics
95 before the line map has seen the end of the file. */
96 size_t total_lines;
98 /* Could this file be missing a trailing newline on its final line?
99 Initially true (to cope with empty files), set to true/false
100 as each line is read. */
101 bool missing_trailing_newline;
103 /* This is a record of the beginning and end of the lines we've seen
104 while reading the file. This is useful to avoid walking the data
105 from the beginning when we are asked to read a line that is
106 before LINE_START_IDX above. Note that the maximum size of this
107 record is fcache_line_record_size, so that the memory consumption
108 doesn't explode. We thus scale total_lines down to
109 fcache_line_record_size. */
110 vec<line_info, va_heap> line_record;
112 fcache ();
113 ~fcache ();
116 /* Current position in real source file. */
118 location_t input_location = UNKNOWN_LOCATION;
120 struct line_maps *line_table;
122 /* A stashed copy of "line_table" for use by selftest::line_table_test.
123 This needs to be a global so that it can be a GC root, and thus
124 prevent the stashed copy from being garbage-collected if the GC runs
125 during a line_table_test. */
127 struct line_maps *saved_line_table;
129 static fcache *fcache_tab;
130 static const size_t fcache_tab_size = 16;
131 static const size_t fcache_buffer_size = 4 * 1024;
132 static const size_t fcache_line_record_size = 100;
134 /* Expand the source location LOC into a human readable location. If
135 LOC resolves to a builtin location, the file name of the readable
136 location is set to the string "<built-in>". If EXPANSION_POINT_P is
137 TRUE and LOC is virtual, then it is resolved to the expansion
138 point of the involved macro. Otherwise, it is resolved to the
139 spelling location of the token.
141 When resolving to the spelling location of the token, if the
142 resulting location is for a built-in location (that is, it has no
143 associated line/column) in the context of a macro expansion, the
144 returned location is the first one (while unwinding the macro
145 location towards its expansion point) that is in real source
146 code. */
148 static expanded_location
149 expand_location_1 (source_location loc,
150 bool expansion_point_p)
152 expanded_location xloc;
153 const line_map_ordinary *map;
154 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
155 tree block = NULL;
157 if (IS_ADHOC_LOC (loc))
159 block = LOCATION_BLOCK (loc);
160 loc = LOCATION_LOCUS (loc);
163 memset (&xloc, 0, sizeof (xloc));
165 if (loc >= RESERVED_LOCATION_COUNT)
167 if (!expansion_point_p)
169 /* We want to resolve LOC to its spelling location.
171 But if that spelling location is a reserved location that
172 appears in the context of a macro expansion (like for a
173 location for a built-in token), let's consider the first
174 location (toward the expansion point) that is not reserved;
175 that is, the first location that is in real source code. */
176 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
177 loc, NULL);
178 lrk = LRK_SPELLING_LOCATION;
180 loc = linemap_resolve_location (line_table, loc,
181 lrk, &map);
182 xloc = linemap_expand_location (line_table, map, loc);
185 xloc.data = block;
186 if (loc <= BUILTINS_LOCATION)
187 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
189 return xloc;
192 /* Initialize the set of cache used for files accessed by caret
193 diagnostic. */
195 static void
196 diagnostic_file_cache_init (void)
198 if (fcache_tab == NULL)
199 fcache_tab = new fcache[fcache_tab_size];
202 /* Free the resources used by the set of cache used for files accessed
203 by caret diagnostic. */
205 void
206 diagnostic_file_cache_fini (void)
208 if (fcache_tab)
210 delete [] (fcache_tab);
211 fcache_tab = NULL;
215 /* Return the total lines number that have been read so far by the
216 line map (in the preprocessor) so far. For languages like C++ that
217 entirely preprocess the input file before starting to parse, this
218 equals the actual number of lines of the file. */
220 static size_t
221 total_lines_num (const char *file_path)
223 size_t r = 0;
224 source_location l = 0;
225 if (linemap_get_file_highest_location (line_table, file_path, &l))
227 gcc_assert (l >= RESERVED_LOCATION_COUNT);
228 expanded_location xloc = expand_location (l);
229 r = xloc.line;
231 return r;
234 /* Lookup the cache used for the content of a given file accessed by
235 caret diagnostic. Return the found cached file, or NULL if no
236 cached file was found. */
238 static fcache*
239 lookup_file_in_cache_tab (const char *file_path)
241 if (file_path == NULL)
242 return NULL;
244 diagnostic_file_cache_init ();
246 /* This will contain the found cached file. */
247 fcache *r = NULL;
248 for (unsigned i = 0; i < fcache_tab_size; ++i)
250 fcache *c = &fcache_tab[i];
251 if (c->file_path && !strcmp (c->file_path, file_path))
253 ++c->use_count;
254 r = c;
258 if (r)
259 ++r->use_count;
261 return r;
264 /* Purge any mention of FILENAME from the cache of files used for
265 printing source code. For use in selftests when working
266 with tempfiles. */
268 void
269 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
271 gcc_assert (file_path);
273 fcache *r = lookup_file_in_cache_tab (file_path);
274 if (!r)
275 /* Not found. */
276 return;
278 r->file_path = NULL;
279 if (r->fp)
280 fclose (r->fp);
281 r->fp = NULL;
282 r->nb_read = 0;
283 r->line_start_idx = 0;
284 r->line_num = 0;
285 r->line_record.truncate (0);
286 r->use_count = 0;
287 r->total_lines = 0;
288 r->missing_trailing_newline = true;
291 /* Return the file cache that has been less used, recently, or the
292 first empty one. If HIGHEST_USE_COUNT is non-null,
293 *HIGHEST_USE_COUNT is set to the highest use count of the entries
294 in the cache table. */
296 static fcache*
297 evicted_cache_tab_entry (unsigned *highest_use_count)
299 diagnostic_file_cache_init ();
301 fcache *to_evict = &fcache_tab[0];
302 unsigned huc = to_evict->use_count;
303 for (unsigned i = 1; i < fcache_tab_size; ++i)
305 fcache *c = &fcache_tab[i];
306 bool c_is_empty = (c->file_path == NULL);
308 if (c->use_count < to_evict->use_count
309 || (to_evict->file_path && c_is_empty))
310 /* We evict C because it's either an entry with a lower use
311 count or one that is empty. */
312 to_evict = c;
314 if (huc < c->use_count)
315 huc = c->use_count;
317 if (c_is_empty)
318 /* We've reached the end of the cache; subsequent elements are
319 all empty. */
320 break;
323 if (highest_use_count)
324 *highest_use_count = huc;
326 return to_evict;
329 /* Create the cache used for the content of a given file to be
330 accessed by caret diagnostic. This cache is added to an array of
331 cache and can be retrieved by lookup_file_in_cache_tab. This
332 function returns the created cache. Note that only the last
333 fcache_tab_size files are cached. */
335 static fcache*
336 add_file_to_cache_tab (const char *file_path)
339 FILE *fp = fopen (file_path, "r");
340 if (fp == NULL)
341 return NULL;
343 unsigned highest_use_count = 0;
344 fcache *r = evicted_cache_tab_entry (&highest_use_count);
345 r->file_path = file_path;
346 if (r->fp)
347 fclose (r->fp);
348 r->fp = fp;
349 r->nb_read = 0;
350 r->line_start_idx = 0;
351 r->line_num = 0;
352 r->line_record.truncate (0);
353 /* Ensure that this cache entry doesn't get evicted next time
354 add_file_to_cache_tab is called. */
355 r->use_count = ++highest_use_count;
356 r->total_lines = total_lines_num (file_path);
357 r->missing_trailing_newline = true;
359 return r;
362 /* Lookup the cache used for the content of a given file accessed by
363 caret diagnostic. If no cached file was found, create a new cache
364 for this file, add it to the array of cached file and return
365 it. */
367 static fcache*
368 lookup_or_add_file_to_cache_tab (const char *file_path)
370 fcache *r = lookup_file_in_cache_tab (file_path);
371 if (r == NULL)
372 r = add_file_to_cache_tab (file_path);
373 return r;
376 /* Default constructor for a cache of file used by caret
377 diagnostic. */
379 fcache::fcache ()
380 : use_count (0), file_path (NULL), fp (NULL), data (0),
381 size (0), nb_read (0), line_start_idx (0), line_num (0),
382 total_lines (0), missing_trailing_newline (true)
384 line_record.create (0);
387 /* Destructor for a cache of file used by caret diagnostic. */
389 fcache::~fcache ()
391 if (fp)
393 fclose (fp);
394 fp = NULL;
396 if (data)
398 XDELETEVEC (data);
399 data = 0;
401 line_record.release ();
404 /* Returns TRUE iff the cache would need to be filled with data coming
405 from the file. That is, either the cache is empty or full or the
406 current line is empty. Note that if the cache is full, it would
407 need to be extended and filled again. */
409 static bool
410 needs_read (fcache *c)
412 return (c->nb_read == 0
413 || c->nb_read == c->size
414 || (c->line_start_idx >= c->nb_read - 1));
417 /* Return TRUE iff the cache is full and thus needs to be
418 extended. */
420 static bool
421 needs_grow (fcache *c)
423 return c->nb_read == c->size;
426 /* Grow the cache if it needs to be extended. */
428 static void
429 maybe_grow (fcache *c)
431 if (!needs_grow (c))
432 return;
434 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
435 c->data = XRESIZEVEC (char, c->data, size + 1);
436 c->size = size;
439 /* Read more data into the cache. Extends the cache if need be.
440 Returns TRUE iff new data could be read. */
442 static bool
443 read_data (fcache *c)
445 if (feof (c->fp) || ferror (c->fp))
446 return false;
448 maybe_grow (c);
450 char * from = c->data + c->nb_read;
451 size_t to_read = c->size - c->nb_read;
452 size_t nb_read = fread (from, 1, to_read, c->fp);
454 if (ferror (c->fp))
455 return false;
457 c->nb_read += nb_read;
458 return !!nb_read;
461 /* Read new data iff the cache needs to be filled with more data
462 coming from the file FP. Return TRUE iff the cache was filled with
463 mode data. */
465 static bool
466 maybe_read_data (fcache *c)
468 if (!needs_read (c))
469 return false;
470 return read_data (c);
473 /* Read a new line from file FP, using C as a cache for the data
474 coming from the file. Upon successful completion, *LINE is set to
475 the beginning of the line found. Space for that line has been
476 allocated in the cache thus *LINE has the same life time as C.
477 *LINE_LEN is set to the length of the line. Note that the line
478 does not contain any terminal delimiter. This function returns
479 true if some data was read or process from the cache, false
480 otherwise. Note that subsequent calls to get_next_line return the
481 next lines of the file and might overwrite the content of
482 *LINE. */
484 static bool
485 get_next_line (fcache *c, char **line, ssize_t *line_len)
487 /* Fill the cache with data to process. */
488 maybe_read_data (c);
490 size_t remaining_size = c->nb_read - c->line_start_idx;
491 if (remaining_size == 0)
492 /* There is no more data to process. */
493 return false;
495 char *line_start = c->data + c->line_start_idx;
497 char *next_line_start = NULL;
498 size_t len = 0;
499 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
500 if (line_end == NULL)
502 /* We haven't found the end-of-line delimiter in the cache.
503 Fill the cache with more data from the file and look for the
504 '\n'. */
505 while (maybe_read_data (c))
507 line_start = c->data + c->line_start_idx;
508 remaining_size = c->nb_read - c->line_start_idx;
509 line_end = (char *) memchr (line_start, '\n', remaining_size);
510 if (line_end != NULL)
512 next_line_start = line_end + 1;
513 break;
516 if (line_end == NULL)
518 /* We've loadded all the file into the cache and still no
519 '\n'. Let's say the line ends up at one byte passed the
520 end of the file. This is to stay consistent with the case
521 of when the line ends up with a '\n' and line_end points to
522 that terminal '\n'. That consistency is useful below in
523 the len calculation. */
524 line_end = c->data + c->nb_read ;
525 c->missing_trailing_newline = true;
527 else
528 c->missing_trailing_newline = false;
530 else
532 next_line_start = line_end + 1;
533 c->missing_trailing_newline = false;
536 if (ferror (c->fp))
537 return -1;
539 /* At this point, we've found the end of the of line. It either
540 points to the '\n' or to one byte after the last byte of the
541 file. */
542 gcc_assert (line_end != NULL);
544 len = line_end - line_start;
546 if (c->line_start_idx < c->nb_read)
547 *line = line_start;
549 ++c->line_num;
551 /* Before we update our line record, make sure the hint about the
552 total number of lines of the file is correct. If it's not, then
553 we give up recording line boundaries from now on. */
554 bool update_line_record = true;
555 if (c->line_num > c->total_lines)
556 update_line_record = false;
558 /* Now update our line record so that re-reading lines from the
559 before c->line_start_idx is faster. */
560 if (update_line_record
561 && c->line_record.length () < fcache_line_record_size)
563 /* If the file lines fits in the line record, we just record all
564 its lines ...*/
565 if (c->total_lines <= fcache_line_record_size
566 && c->line_num > c->line_record.length ())
567 c->line_record.safe_push (fcache::line_info (c->line_num,
568 c->line_start_idx,
569 line_end - c->data));
570 else if (c->total_lines > fcache_line_record_size)
572 /* ... otherwise, we just scale total_lines down to
573 (fcache_line_record_size lines. */
574 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
575 if (c->line_record.length () == 0
576 || n >= c->line_record.length ())
577 c->line_record.safe_push (fcache::line_info (c->line_num,
578 c->line_start_idx,
579 line_end - c->data));
583 /* Update c->line_start_idx so that it points to the next line to be
584 read. */
585 if (next_line_start)
586 c->line_start_idx = next_line_start - c->data;
587 else
588 /* We didn't find any terminal '\n'. Let's consider that the end
589 of line is the end of the data in the cache. The next
590 invocation of get_next_line will either read more data from the
591 underlying file or return false early because we've reached the
592 end of the file. */
593 c->line_start_idx = c->nb_read;
595 *line_len = len;
597 return true;
600 /* Reads the next line from FILE into *LINE. If *LINE is too small
601 (or NULL) it is allocated (or extended) to have enough space to
602 containe the line. *LINE_LENGTH must contain the size of the
603 initial*LINE buffer. It's then updated by this function to the
604 actual length of the returned line. Note that the returned line
605 can contain several zero bytes. Also note that the returned string
606 is allocated in static storage that is going to be re-used by
607 subsequent invocations of read_line. */
609 static bool
610 read_next_line (fcache *cache, char ** line, ssize_t *line_len)
612 char *l = NULL;
613 ssize_t len = 0;
615 if (!get_next_line (cache, &l, &len))
616 return false;
618 if (*line == NULL)
619 *line = XNEWVEC (char, len);
620 else
621 if (*line_len < len)
622 *line = XRESIZEVEC (char, *line, len);
624 memcpy (*line, l, len);
625 *line_len = len;
627 return true;
630 /* Consume the next bytes coming from the cache (or from its
631 underlying file if there are remaining unread bytes in the file)
632 until we reach the next end-of-line (or end-of-file). There is no
633 copying from the cache involved. Return TRUE upon successful
634 completion. */
636 static bool
637 goto_next_line (fcache *cache)
639 char *l;
640 ssize_t len;
642 return get_next_line (cache, &l, &len);
645 /* Read an arbitrary line number LINE_NUM from the file cached in C.
646 The line is copied into *LINE. *LINE_LEN must have been set to the
647 length of *LINE. If *LINE is too small (or NULL) it's extended (or
648 allocated) and *LINE_LEN is adjusted accordingly. *LINE ends up
649 with a terminal zero byte and can contain additional zero bytes.
650 This function returns bool if a line was read. */
652 static bool
653 read_line_num (fcache *c, size_t line_num,
654 char ** line, ssize_t *line_len)
656 gcc_assert (line_num > 0);
658 if (line_num <= c->line_num)
660 /* We've been asked to read lines that are before c->line_num.
661 So lets use our line record (if it's not empty) to try to
662 avoid re-reading the file from the beginning again. */
664 if (c->line_record.is_empty ())
666 c->line_start_idx = 0;
667 c->line_num = 0;
669 else
671 fcache::line_info *i = NULL;
672 if (c->total_lines <= fcache_line_record_size)
674 /* In languages where the input file is not totally
675 preprocessed up front, the c->total_lines hint
676 can be smaller than the number of lines of the
677 file. In that case, only the first
678 c->total_lines have been recorded.
680 Otherwise, the first c->total_lines we've read have
681 their start/end recorded here. */
682 i = (line_num <= c->total_lines)
683 ? &c->line_record[line_num - 1]
684 : &c->line_record[c->total_lines - 1];
685 gcc_assert (i->line_num <= line_num);
687 else
689 /* So the file had more lines than our line record
690 size. Thus the number of lines we've recorded has
691 been scaled down to fcache_line_reacord_size. Let's
692 pick the start/end of the recorded line that is
693 closest to line_num. */
694 size_t n = (line_num <= c->total_lines)
695 ? line_num * fcache_line_record_size / c->total_lines
696 : c ->line_record.length () - 1;
697 if (n < c->line_record.length ())
699 i = &c->line_record[n];
700 gcc_assert (i->line_num <= line_num);
704 if (i && i->line_num == line_num)
706 /* We have the start/end of the line. Let's just copy
707 it again and we are done. */
708 ssize_t len = i->end_pos - i->start_pos + 1;
709 if (*line_len < len)
710 *line = XRESIZEVEC (char, *line, len);
711 memmove (*line, c->data + i->start_pos, len);
712 (*line)[len - 1] = '\0';
713 *line_len = --len;
714 return true;
717 if (i)
719 c->line_start_idx = i->start_pos;
720 c->line_num = i->line_num - 1;
722 else
724 c->line_start_idx = 0;
725 c->line_num = 0;
730 /* Let's walk from line c->line_num up to line_num - 1, without
731 copying any line. */
732 while (c->line_num < line_num - 1)
733 if (!goto_next_line (c))
734 return false;
736 /* The line we want is the next one. Let's read and copy it back to
737 the caller. */
738 return read_next_line (c, line, line_len);
741 /* Return the physical source line that corresponds to FILE_PATH/LINE in a
742 buffer that is statically allocated. The newline is replaced by
743 the null character. Note that the line can contain several null
744 characters, so LINE_LEN, if non-null, points to the actual length
745 of the line. */
747 const char *
748 location_get_source_line (const char *file_path, int line,
749 int *line_len)
751 static char *buffer;
752 static ssize_t len;
754 if (line == 0)
755 return NULL;
757 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
758 if (c == NULL)
759 return NULL;
761 bool read = read_line_num (c, line, &buffer, &len);
763 if (read && line_len)
764 *line_len = len;
766 return read ? buffer : NULL;
769 /* Determine if FILE_PATH missing a trailing newline on its final line.
770 Only valid to call once all of the file has been loaded, by
771 requesting a line number beyond the end of the file. */
773 bool
774 location_missing_trailing_newline (const char *file_path)
776 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
777 if (c == NULL)
778 return false;
780 return c->missing_trailing_newline;
783 /* Test if the location originates from the spelling location of a
784 builtin-tokens. That is, return TRUE if LOC is a (possibly
785 virtual) location of a built-in token that appears in the expansion
786 list of a macro. Please note that this function also works on
787 tokens that result from built-in tokens. For instance, the
788 function would return true if passed a token "4" that is the result
789 of the expansion of the built-in __LINE__ macro. */
790 bool
791 is_location_from_builtin_token (source_location loc)
793 const line_map_ordinary *map = NULL;
794 loc = linemap_resolve_location (line_table, loc,
795 LRK_SPELLING_LOCATION, &map);
796 return loc == BUILTINS_LOCATION;
799 /* Expand the source location LOC into a human readable location. If
800 LOC is virtual, it resolves to the expansion point of the involved
801 macro. If LOC resolves to a builtin location, the file name of the
802 readable location is set to the string "<built-in>". */
804 expanded_location
805 expand_location (source_location loc)
807 return expand_location_1 (loc, /*expansion_point_p=*/true);
810 /* Expand the source location LOC into a human readable location. If
811 LOC is virtual, it resolves to the expansion location of the
812 relevant macro. If LOC resolves to a builtin location, the file
813 name of the readable location is set to the string
814 "<built-in>". */
816 expanded_location
817 expand_location_to_spelling_point (source_location loc)
819 return expand_location_1 (loc, /*expansion_point_p=*/false);
822 /* The rich_location class within libcpp requires a way to expand
823 source_location instances, and relies on the client code
824 providing a symbol named
825 linemap_client_expand_location_to_spelling_point
826 to do this.
828 This is the implementation for libcommon.a (all host binaries),
829 which simply calls into expand_location_to_spelling_point. */
831 expanded_location
832 linemap_client_expand_location_to_spelling_point (source_location loc)
834 return expand_location_to_spelling_point (loc);
838 /* If LOCATION is in a system header and if it is a virtual location for
839 a token coming from the expansion of a macro, unwind it to the
840 location of the expansion point of the macro. Otherwise, just return
841 LOCATION.
843 This is used for instance when we want to emit diagnostics about a
844 token that may be located in a macro that is itself defined in a
845 system header, for example, for the NULL macro. In such a case, if
846 LOCATION were passed directly to diagnostic functions such as
847 warning_at, the diagnostic would be suppressed (unless
848 -Wsystem-headers). */
850 source_location
851 expansion_point_location_if_in_system_header (source_location location)
853 if (in_system_header_at (location))
854 location = linemap_resolve_location (line_table, location,
855 LRK_MACRO_EXPANSION_POINT,
856 NULL);
857 return location;
860 /* If LOCATION is a virtual location for a token coming from the expansion
861 of a macro, unwind to the location of the expansion point of the macro. */
863 source_location
864 expansion_point_location (source_location location)
866 return linemap_resolve_location (line_table, location,
867 LRK_MACRO_EXPANSION_POINT, NULL);
870 /* Construct a location with caret at CARET, ranging from START to
871 finish e.g.
873 11111111112
874 12345678901234567890
876 523 return foo + bar;
877 ~~~~^~~~~
880 The location's caret is at the "+", line 523 column 15, but starts
881 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
882 of "bar" at column 19. */
884 location_t
885 make_location (location_t caret, location_t start, location_t finish)
887 location_t pure_loc = get_pure_location (caret);
888 source_range src_range;
889 src_range.m_start = get_start (start);
890 src_range.m_finish = get_finish (finish);
891 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
892 pure_loc,
893 src_range,
894 NULL);
895 return combined_loc;
898 #define ONE_K 1024
899 #define ONE_M (ONE_K * ONE_K)
901 /* Display a number as an integer multiple of either:
902 - 1024, if said integer is >= to 10 K (in base 2)
903 - 1024 * 1024, if said integer is >= 10 M in (base 2)
905 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
906 ? (x) \
907 : ((x) < 10 * ONE_M \
908 ? (x) / ONE_K \
909 : (x) / ONE_M)))
911 /* For a given integer, display either:
912 - the character 'k', if the number is higher than 10 K (in base 2)
913 but strictly lower than 10 M (in base 2)
914 - the character 'M' if the number is higher than 10 M (in base2)
915 - the charcter ' ' if the number is strictly lower than 10 K */
916 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
918 /* Display an integer amount as multiple of 1K or 1M (in base 2).
919 Display the correct unit (either k, M, or ' ') after the amout, as
920 well. */
921 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
923 /* Dump statistics to stderr about the memory usage of the line_table
924 set of line maps. This also displays some statistics about macro
925 expansion. */
927 void
928 dump_line_table_statistics (void)
930 struct linemap_stats s;
931 long total_used_map_size,
932 macro_maps_size,
933 total_allocated_map_size;
935 memset (&s, 0, sizeof (s));
937 linemap_get_statistics (line_table, &s);
939 macro_maps_size = s.macro_maps_used_size
940 + s.macro_maps_locations_size;
942 total_allocated_map_size = s.ordinary_maps_allocated_size
943 + s.macro_maps_allocated_size
944 + s.macro_maps_locations_size;
946 total_used_map_size = s.ordinary_maps_used_size
947 + s.macro_maps_used_size
948 + s.macro_maps_locations_size;
950 fprintf (stderr, "Number of expanded macros: %5ld\n",
951 s.num_expanded_macros);
952 if (s.num_expanded_macros != 0)
953 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
954 s.num_macro_tokens / s.num_expanded_macros);
955 fprintf (stderr,
956 "\nLine Table allocations during the "
957 "compilation process\n");
958 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
959 SCALE (s.num_ordinary_maps_used),
960 STAT_LABEL (s.num_ordinary_maps_used));
961 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
962 SCALE (s.ordinary_maps_used_size),
963 STAT_LABEL (s.ordinary_maps_used_size));
964 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
965 SCALE (s.num_ordinary_maps_allocated),
966 STAT_LABEL (s.num_ordinary_maps_allocated));
967 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
968 SCALE (s.ordinary_maps_allocated_size),
969 STAT_LABEL (s.ordinary_maps_allocated_size));
970 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
971 SCALE (s.num_macro_maps_used),
972 STAT_LABEL (s.num_macro_maps_used));
973 fprintf (stderr, "Macro maps used size: %5ld%c\n",
974 SCALE (s.macro_maps_used_size),
975 STAT_LABEL (s.macro_maps_used_size));
976 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
977 SCALE (s.macro_maps_locations_size),
978 STAT_LABEL (s.macro_maps_locations_size));
979 fprintf (stderr, "Macro maps size: %5ld%c\n",
980 SCALE (macro_maps_size),
981 STAT_LABEL (macro_maps_size));
982 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
983 SCALE (s.duplicated_macro_maps_locations_size),
984 STAT_LABEL (s.duplicated_macro_maps_locations_size));
985 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
986 SCALE (total_allocated_map_size),
987 STAT_LABEL (total_allocated_map_size));
988 fprintf (stderr, "Total used maps size: %5ld%c\n",
989 SCALE (total_used_map_size),
990 STAT_LABEL (total_used_map_size));
991 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
992 SCALE (s.adhoc_table_size),
993 STAT_LABEL (s.adhoc_table_size));
994 fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
995 s.adhoc_table_entries_used);
996 fprintf (stderr, "optimized_ranges: %i\n",
997 line_table->num_optimized_ranges);
998 fprintf (stderr, "unoptimized_ranges: %i\n",
999 line_table->num_unoptimized_ranges);
1001 fprintf (stderr, "\n");
1004 /* Get location one beyond the final location in ordinary map IDX. */
1006 static source_location
1007 get_end_location (struct line_maps *set, unsigned int idx)
1009 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1010 return set->highest_location;
1012 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1013 return MAP_START_LOCATION (next_map);
1016 /* Helper function for write_digit_row. */
1018 static void
1019 write_digit (FILE *stream, int digit)
1021 fputc ('0' + (digit % 10), stream);
1024 /* Helper function for dump_location_info.
1025 Write a row of numbers to STREAM, numbering a source line,
1026 giving the units, tens, hundreds etc of the column number. */
1028 static void
1029 write_digit_row (FILE *stream, int indent,
1030 const line_map_ordinary *map,
1031 source_location loc, int max_col, int divisor)
1033 fprintf (stream, "%*c", indent, ' ');
1034 fprintf (stream, "|");
1035 for (int column = 1; column < max_col; column++)
1037 source_location column_loc = loc + (column << map->m_range_bits);
1038 write_digit (stream, column_loc / divisor);
1040 fprintf (stream, "\n");
1043 /* Write a half-closed (START) / half-open (END) interval of
1044 source_location to STREAM. */
1046 static void
1047 dump_location_range (FILE *stream,
1048 source_location start, source_location end)
1050 fprintf (stream,
1051 " source_location interval: %u <= loc < %u\n",
1052 start, end);
1055 /* Write a labelled description of a half-closed (START) / half-open (END)
1056 interval of source_location to STREAM. */
1058 static void
1059 dump_labelled_location_range (FILE *stream,
1060 const char *name,
1061 source_location start, source_location end)
1063 fprintf (stream, "%s\n", name);
1064 dump_location_range (stream, start, end);
1065 fprintf (stream, "\n");
1068 /* Write a visualization of the locations in the line_table to STREAM. */
1070 void
1071 dump_location_info (FILE *stream)
1073 /* Visualize the reserved locations. */
1074 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1075 0, RESERVED_LOCATION_COUNT);
1077 /* Visualize the ordinary line_map instances, rendering the sources. */
1078 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1080 source_location end_location = get_end_location (line_table, idx);
1081 /* half-closed: doesn't include this one. */
1083 const line_map_ordinary *map
1084 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1085 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1086 dump_location_range (stream,
1087 MAP_START_LOCATION (map), end_location);
1088 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1089 fprintf (stream, " starting at line: %i\n",
1090 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1091 fprintf (stream, " column and range bits: %i\n",
1092 map->m_column_and_range_bits);
1093 fprintf (stream, " column bits: %i\n",
1094 map->m_column_and_range_bits - map->m_range_bits);
1095 fprintf (stream, " range bits: %i\n",
1096 map->m_range_bits);
1098 /* Render the span of source lines that this "map" covers. */
1099 for (source_location loc = MAP_START_LOCATION (map);
1100 loc < end_location;
1101 loc += (1 << map->m_range_bits) )
1103 gcc_assert (pure_location_p (line_table, loc) );
1105 expanded_location exploc
1106 = linemap_expand_location (line_table, map, loc);
1108 if (0 == exploc.column)
1110 /* Beginning of a new source line: draw the line. */
1112 int line_size;
1113 const char *line_text = location_get_source_line (exploc.file,
1114 exploc.line,
1115 &line_size);
1116 if (!line_text)
1117 break;
1118 fprintf (stream,
1119 "%s:%3i|loc:%5i|%.*s\n",
1120 exploc.file, exploc.line,
1121 loc,
1122 line_size, line_text);
1124 /* "loc" is at column 0, which means "the whole line".
1125 Render the locations *within* the line, by underlining
1126 it, showing the source_location numeric values
1127 at each column. */
1128 int max_col = (1 << map->m_column_and_range_bits) - 1;
1129 if (max_col > line_size)
1130 max_col = line_size + 1;
1132 int indent = 14 + strlen (exploc.file);
1134 /* Thousands. */
1135 if (end_location > 999)
1136 write_digit_row (stream, indent, map, loc, max_col, 1000);
1138 /* Hundreds. */
1139 if (end_location > 99)
1140 write_digit_row (stream, indent, map, loc, max_col, 100);
1142 /* Tens. */
1143 write_digit_row (stream, indent, map, loc, max_col, 10);
1145 /* Units. */
1146 write_digit_row (stream, indent, map, loc, max_col, 1);
1149 fprintf (stream, "\n");
1152 /* Visualize unallocated values. */
1153 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1154 line_table->highest_location,
1155 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1157 /* Visualize the macro line_map instances, rendering the sources. */
1158 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1160 /* Each macro map that is allocated owns source_location values
1161 that are *lower* that the one before them.
1162 Hence it's meaningful to view them either in order of ascending
1163 source locations, or in order of ascending macro map index. */
1164 const bool ascending_source_locations = true;
1165 unsigned int idx = (ascending_source_locations
1166 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1167 : i);
1168 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1169 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1170 idx,
1171 linemap_map_get_macro_name (map),
1172 MACRO_MAP_NUM_MACRO_TOKENS (map));
1173 dump_location_range (stream,
1174 map->start_location,
1175 (map->start_location
1176 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1177 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1178 "expansion point is location %i",
1179 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1180 fprintf (stream, " map->start_location: %u\n",
1181 map->start_location);
1183 fprintf (stream, " macro_locations:\n");
1184 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1186 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1187 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1189 /* linemap_add_macro_token encodes token numbers in an expansion
1190 by putting them after MAP_START_LOCATION. */
1192 /* I'm typically seeing 4 uninitialized entries at the end of
1193 0xafafafaf.
1194 This appears to be due to macro.c:replace_args
1195 adding 2 extra args for padding tokens; presumably there may
1196 be a leading and/or trailing padding token injected,
1197 each for 2 more location slots.
1198 This would explain there being up to 4 source_locations slots
1199 that may be uninitialized. */
1201 fprintf (stream, " %u: %u, %u\n",
1205 if (x == y)
1207 if (x < MAP_START_LOCATION (map))
1208 inform (x, "token %u has x-location == y-location == %u", i, x);
1209 else
1210 fprintf (stream,
1211 "x-location == y-location == %u encodes token # %u\n",
1212 x, x - MAP_START_LOCATION (map));
1214 else
1216 inform (x, "token %u has x-location == %u", i, x);
1217 inform (x, "token %u has y-location == %u", i, y);
1220 fprintf (stream, "\n");
1223 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1224 macro map, presumably due to an off-by-one error somewhere
1225 between the logic in linemap_enter_macro and
1226 LINEMAPS_MACRO_LOWEST_LOCATION. */
1227 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1228 MAX_SOURCE_LOCATION,
1229 MAX_SOURCE_LOCATION + 1);
1231 /* Visualize ad-hoc values. */
1232 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1233 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1236 /* string_concat's constructor. */
1238 string_concat::string_concat (int num, location_t *locs)
1239 : m_num (num)
1241 m_locs = ggc_vec_alloc <location_t> (num);
1242 for (int i = 0; i < num; i++)
1243 m_locs[i] = locs[i];
1246 /* string_concat_db's constructor. */
1248 string_concat_db::string_concat_db ()
1250 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1253 /* Record that a string concatenation occurred, covering NUM
1254 string literal tokens. LOCS is an array of size NUM, containing the
1255 locations of the tokens. A copy of LOCS is taken. */
1257 void
1258 string_concat_db::record_string_concatenation (int num, location_t *locs)
1260 gcc_assert (num > 1);
1261 gcc_assert (locs);
1263 location_t key_loc = get_key_loc (locs[0]);
1265 string_concat *concat
1266 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1267 m_table->put (key_loc, concat);
1270 /* Determine if LOC was the location of the the initial token of a
1271 concatenation of string literal tokens.
1272 If so, *OUT_NUM is written to with the number of tokens, and
1273 *OUT_LOCS with the location of an array of locations of the
1274 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1275 storage owned by the string_concat_db.
1276 Otherwise, return false. */
1278 bool
1279 string_concat_db::get_string_concatenation (location_t loc,
1280 int *out_num,
1281 location_t **out_locs)
1283 gcc_assert (out_num);
1284 gcc_assert (out_locs);
1286 location_t key_loc = get_key_loc (loc);
1288 string_concat **concat = m_table->get (key_loc);
1289 if (!concat)
1290 return false;
1292 *out_num = (*concat)->m_num;
1293 *out_locs =(*concat)->m_locs;
1294 return true;
1297 /* Internal function. Canonicalize LOC into a form suitable for
1298 use as a key within the database, stripping away macro expansion,
1299 ad-hoc information, and range information, using the location of
1300 the start of LOC within an ordinary linemap. */
1302 location_t
1303 string_concat_db::get_key_loc (location_t loc)
1305 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1306 NULL);
1308 loc = get_range_from_loc (line_table, loc).m_start;
1310 return loc;
1313 /* Helper class for use within get_substring_ranges_for_loc.
1314 An vec of cpp_string with responsibility for releasing all of the
1315 str->text for each str in the vector. */
1317 class auto_cpp_string_vec : public auto_vec <cpp_string>
1319 public:
1320 auto_cpp_string_vec (int alloc)
1321 : auto_vec <cpp_string> (alloc) {}
1323 ~auto_cpp_string_vec ()
1325 /* Clean up the copies within this vec. */
1326 int i;
1327 cpp_string *str;
1328 FOR_EACH_VEC_ELT (*this, i, str)
1329 free (const_cast <unsigned char *> (str->text));
1333 /* Attempt to populate RANGES with source location information on the
1334 individual characters within the string literal found at STRLOC.
1335 If CONCATS is non-NULL, then any string literals that the token at
1336 STRLOC was concatenated with are also added to RANGES.
1338 Return NULL if successful, or an error message if any errors occurred (in
1339 which case RANGES may be only partially populated and should not
1340 be used).
1342 This is implemented by re-parsing the relevant source line(s). */
1344 static const char *
1345 get_substring_ranges_for_loc (cpp_reader *pfile,
1346 string_concat_db *concats,
1347 location_t strloc,
1348 enum cpp_ttype type,
1349 cpp_substring_ranges &ranges)
1351 gcc_assert (pfile);
1353 if (strloc == UNKNOWN_LOCATION)
1354 return "unknown location";
1356 /* If string concatenation has occurred at STRLOC, get the locations
1357 of all of the literal tokens making up the compound string.
1358 Otherwise, just use STRLOC. */
1359 int num_locs = 1;
1360 location_t *strlocs = &strloc;
1361 if (concats)
1362 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1364 auto_cpp_string_vec strs (num_locs);
1365 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1366 for (int i = 0; i < num_locs; i++)
1368 /* Get range of strloc. We will use it to locate the start and finish
1369 of the literal token within the line. */
1370 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1372 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1373 /* If the string is within a macro expansion, we can't get at the
1374 end location. */
1375 return "macro expansion";
1377 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1378 /* If so, we can't reliably determine where the token started within
1379 its line. */
1380 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1382 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1383 /* If so, we can't reliably determine where the token finished within
1384 its line. */
1385 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1387 expanded_location start
1388 = expand_location_to_spelling_point (src_range.m_start);
1389 expanded_location finish
1390 = expand_location_to_spelling_point (src_range.m_finish);
1391 if (start.file != finish.file)
1392 return "range endpoints are in different files";
1393 if (start.line != finish.line)
1394 return "range endpoints are on different lines";
1395 if (start.column > finish.column)
1396 return "range endpoints are reversed";
1398 int line_width;
1399 const char *line = location_get_source_line (start.file, start.line,
1400 &line_width);
1401 if (line == NULL)
1402 return "unable to read source line";
1404 /* Determine the location of the literal (including quotes
1405 and leading prefix chars, such as the 'u' in a u""
1406 token). */
1407 const char *literal = line + start.column - 1;
1408 int literal_length = finish.column - start.column + 1;
1410 gcc_assert (line_width >= (start.column - 1 + literal_length));
1411 cpp_string from;
1412 from.len = literal_length;
1413 /* Make a copy of the literal, to avoid having to rely on
1414 the lifetime of the copy of the line within the cache.
1415 This will be released by the auto_cpp_string_vec dtor. */
1416 from.text = XDUPVEC (unsigned char, literal, literal_length);
1417 strs.safe_push (from);
1419 /* For very long lines, a new linemap could have started
1420 halfway through the token.
1421 Ensure that the loc_reader uses the linemap of the
1422 *end* of the token for its start location. */
1423 const line_map_ordinary *final_ord_map;
1424 linemap_resolve_location (line_table, src_range.m_finish,
1425 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1426 location_t start_loc
1427 = linemap_position_for_line_and_column (line_table, final_ord_map,
1428 start.line, start.column);
1430 cpp_string_location_reader loc_reader (start_loc, line_table);
1431 loc_readers.safe_push (loc_reader);
1434 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1435 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1436 loc_readers.address (),
1437 num_locs, &ranges, type);
1438 if (err)
1439 return err;
1441 /* Success: "ranges" should now contain information on the string. */
1442 return NULL;
1445 /* Attempt to populate *OUT_LOC with source location information on the
1446 given characters within the string literal found at STRLOC.
1447 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1448 character set.
1450 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1451 and string literal "012345\n789"
1452 *OUT_LOC is written to with:
1453 "012345\n789"
1454 ~^~~~~
1456 If CONCATS is non-NULL, then any string literals that the token at
1457 STRLOC was concatenated with are also considered.
1459 This is implemented by re-parsing the relevant source line(s).
1461 Return NULL if successful, or an error message if any errors occurred.
1462 Error messages are intended for GCC developers (to help debugging) rather
1463 than for end-users. */
1465 const char *
1466 get_source_location_for_substring (cpp_reader *pfile,
1467 string_concat_db *concats,
1468 location_t strloc,
1469 enum cpp_ttype type,
1470 int caret_idx, int start_idx, int end_idx,
1471 source_location *out_loc)
1473 gcc_checking_assert (caret_idx >= 0);
1474 gcc_checking_assert (start_idx >= 0);
1475 gcc_checking_assert (end_idx >= 0);
1476 gcc_assert (out_loc);
1478 cpp_substring_ranges ranges;
1479 const char *err
1480 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1481 if (err)
1482 return err;
1484 if (caret_idx >= ranges.get_num_ranges ())
1485 return "caret_idx out of range";
1486 if (start_idx >= ranges.get_num_ranges ())
1487 return "start_idx out of range";
1488 if (end_idx >= ranges.get_num_ranges ())
1489 return "end_idx out of range";
1491 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1492 ranges.get_range (start_idx).m_start,
1493 ranges.get_range (end_idx).m_finish);
1494 return NULL;
1497 #if CHECKING_P
1499 namespace selftest {
1501 /* Selftests of location handling. */
1503 /* Attempt to populate *OUT_RANGE with source location information on the
1504 given character within the string literal found at STRLOC.
1505 CHAR_IDX refers to an offset within the execution character set.
1506 If CONCATS is non-NULL, then any string literals that the token at
1507 STRLOC was concatenated with are also considered.
1509 This is implemented by re-parsing the relevant source line(s).
1511 Return NULL if successful, or an error message if any errors occurred.
1512 Error messages are intended for GCC developers (to help debugging) rather
1513 than for end-users. */
1515 static const char *
1516 get_source_range_for_char (cpp_reader *pfile,
1517 string_concat_db *concats,
1518 location_t strloc,
1519 enum cpp_ttype type,
1520 int char_idx,
1521 source_range *out_range)
1523 gcc_checking_assert (char_idx >= 0);
1524 gcc_assert (out_range);
1526 cpp_substring_ranges ranges;
1527 const char *err
1528 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1529 if (err)
1530 return err;
1532 if (char_idx >= ranges.get_num_ranges ())
1533 return "char_idx out of range";
1535 *out_range = ranges.get_range (char_idx);
1536 return NULL;
1539 /* As get_source_range_for_char, but write to *OUT the number
1540 of ranges that are available. */
1542 static const char *
1543 get_num_source_ranges_for_substring (cpp_reader *pfile,
1544 string_concat_db *concats,
1545 location_t strloc,
1546 enum cpp_ttype type,
1547 int *out)
1549 gcc_assert (out);
1551 cpp_substring_ranges ranges;
1552 const char *err
1553 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1555 if (err)
1556 return err;
1558 *out = ranges.get_num_ranges ();
1559 return NULL;
1562 /* Selftests of location handling. */
1564 /* Helper function for verifying location data: when location_t
1565 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1566 as having column 0. */
1568 static bool
1569 should_have_column_data_p (location_t loc)
1571 if (IS_ADHOC_LOC (loc))
1572 loc = get_location_from_adhoc_loc (line_table, loc);
1573 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1574 return false;
1575 return true;
1578 /* Selftest for should_have_column_data_p. */
1580 static void
1581 test_should_have_column_data_p ()
1583 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1584 ASSERT_TRUE
1585 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1586 ASSERT_FALSE
1587 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1590 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1591 on LOC. */
1593 static void
1594 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1595 location_t loc)
1597 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1598 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1599 /* If location_t values are sufficiently high, then column numbers
1600 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1601 When close to the threshold, column numbers *may* be present: if
1602 the final linemap before the threshold contains a line that straddles
1603 the threshold, locations in that line have column information. */
1604 if (should_have_column_data_p (loc))
1605 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1608 /* Various selftests involve constructing a line table and one or more
1609 line maps within it.
1611 For maximum test coverage we want to run these tests with a variety
1612 of situations:
1613 - line_table->default_range_bits: some frontends use a non-zero value
1614 and others use zero
1615 - the fallback modes within line-map.c: there are various threshold
1616 values for source_location/location_t beyond line-map.c changes
1617 behavior (disabling of the range-packing optimization, disabling
1618 of column-tracking). We can exercise these by starting the line_table
1619 at interesting values at or near these thresholds.
1621 The following struct describes a particular case within our test
1622 matrix. */
1624 struct line_table_case
1626 line_table_case (int default_range_bits, int base_location)
1627 : m_default_range_bits (default_range_bits),
1628 m_base_location (base_location)
1631 int m_default_range_bits;
1632 int m_base_location;
1635 /* Constructor. Store the old value of line_table, and create a new
1636 one, using sane defaults. */
1638 line_table_test::line_table_test ()
1640 gcc_assert (saved_line_table == NULL);
1641 saved_line_table = line_table;
1642 line_table = ggc_alloc<line_maps> ();
1643 linemap_init (line_table, BUILTINS_LOCATION);
1644 gcc_assert (saved_line_table->reallocator);
1645 line_table->reallocator = saved_line_table->reallocator;
1646 gcc_assert (saved_line_table->round_alloc_size);
1647 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1648 line_table->default_range_bits = 0;
1651 /* Constructor. Store the old value of line_table, and create a new
1652 one, using the sitation described in CASE_. */
1654 line_table_test::line_table_test (const line_table_case &case_)
1656 gcc_assert (saved_line_table == NULL);
1657 saved_line_table = line_table;
1658 line_table = ggc_alloc<line_maps> ();
1659 linemap_init (line_table, BUILTINS_LOCATION);
1660 gcc_assert (saved_line_table->reallocator);
1661 line_table->reallocator = saved_line_table->reallocator;
1662 gcc_assert (saved_line_table->round_alloc_size);
1663 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1664 line_table->default_range_bits = case_.m_default_range_bits;
1665 if (case_.m_base_location)
1667 line_table->highest_location = case_.m_base_location;
1668 line_table->highest_line = case_.m_base_location;
1672 /* Destructor. Restore the old value of line_table. */
1674 line_table_test::~line_table_test ()
1676 gcc_assert (saved_line_table != NULL);
1677 line_table = saved_line_table;
1678 saved_line_table = NULL;
1681 /* Verify basic operation of ordinary linemaps. */
1683 static void
1684 test_accessing_ordinary_linemaps (const line_table_case &case_)
1686 line_table_test ltt (case_);
1688 /* Build a simple linemap describing some locations. */
1689 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1691 linemap_line_start (line_table, 1, 100);
1692 location_t loc_a = linemap_position_for_column (line_table, 1);
1693 location_t loc_b = linemap_position_for_column (line_table, 23);
1695 linemap_line_start (line_table, 2, 100);
1696 location_t loc_c = linemap_position_for_column (line_table, 1);
1697 location_t loc_d = linemap_position_for_column (line_table, 17);
1699 /* Example of a very long line. */
1700 linemap_line_start (line_table, 3, 2000);
1701 location_t loc_e = linemap_position_for_column (line_table, 700);
1703 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1705 /* Multiple files. */
1706 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1707 linemap_line_start (line_table, 1, 200);
1708 location_t loc_f = linemap_position_for_column (line_table, 150);
1709 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1711 /* Verify that we can recover the location info. */
1712 assert_loceq ("foo.c", 1, 1, loc_a);
1713 assert_loceq ("foo.c", 1, 23, loc_b);
1714 assert_loceq ("foo.c", 2, 1, loc_c);
1715 assert_loceq ("foo.c", 2, 17, loc_d);
1716 assert_loceq ("foo.c", 3, 700, loc_e);
1717 assert_loceq ("bar.c", 1, 150, loc_f);
1719 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1720 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1722 /* Verify using make_location to build a range, and extracting data
1723 back from it. */
1724 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1725 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1726 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1727 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1728 ASSERT_EQ (loc_b, src_range.m_start);
1729 ASSERT_EQ (loc_d, src_range.m_finish);
1732 /* Verify various properties of UNKNOWN_LOCATION. */
1734 static void
1735 test_unknown_location ()
1737 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1738 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1739 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1742 /* Verify various properties of BUILTINS_LOCATION. */
1744 static void
1745 test_builtins ()
1747 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1748 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1751 /* Regression test for make_location.
1752 Ensure that we use pure locations for the start/finish of the range,
1753 rather than storing a packed or ad-hoc range as the start/finish. */
1755 static void
1756 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1758 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1759 with C++ frontend.
1760 ....................0000000001111111111222.
1761 ....................1234567890123456789012. */
1762 const char *content = " r += !aaa == bbb;\n";
1763 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1764 line_table_test ltt (case_);
1765 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1767 const location_t c11 = linemap_position_for_column (line_table, 11);
1768 const location_t c12 = linemap_position_for_column (line_table, 12);
1769 const location_t c13 = linemap_position_for_column (line_table, 13);
1770 const location_t c14 = linemap_position_for_column (line_table, 14);
1771 const location_t c21 = linemap_position_for_column (line_table, 21);
1773 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1774 return;
1776 /* Use column 13 for the caret location, arbitrarily, to verify that we
1777 handle start != caret. */
1778 const location_t aaa = make_location (c13, c12, c14);
1779 ASSERT_EQ (c13, get_pure_location (aaa));
1780 ASSERT_EQ (c12, get_start (aaa));
1781 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1782 ASSERT_EQ (c14, get_finish (aaa));
1783 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1785 /* Make a location using a location with a range as the start-point. */
1786 const location_t not_aaa = make_location (c11, aaa, c14);
1787 ASSERT_EQ (c11, get_pure_location (not_aaa));
1788 /* It should use the start location of the range, not store the range
1789 itself. */
1790 ASSERT_EQ (c12, get_start (not_aaa));
1791 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1792 ASSERT_EQ (c14, get_finish (not_aaa));
1793 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1795 /* Similarly, make a location with a range as the end-point. */
1796 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1797 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1798 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1799 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1800 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1801 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1802 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1803 /* It should use the finish location of the range, not store the range
1804 itself. */
1805 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1806 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1807 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1808 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1809 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1812 /* Verify reading of input files (e.g. for caret-based diagnostics). */
1814 static void
1815 test_reading_source_line ()
1817 /* Create a tempfile and write some text to it. */
1818 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1819 "01234567890123456789\n"
1820 "This is the test text\n"
1821 "This is the 3rd line\n");
1823 /* Read back a specific line from the tempfile. */
1824 int line_size;
1825 const char *source_line = location_get_source_line (tmp.get_filename (),
1826 2, &line_size);
1827 ASSERT_TRUE (source_line != NULL);
1828 ASSERT_EQ (21, line_size);
1829 if (!strncmp ("This is the test text",
1830 source_line, line_size))
1831 ::selftest::pass (SELFTEST_LOCATION,
1832 "source_line matched expected value");
1833 else
1834 ::selftest::fail (SELFTEST_LOCATION,
1835 "source_line did not match expected value");
1839 /* Tests of lexing. */
1841 /* Verify that token TOK from PARSER has cpp_token_as_text
1842 equal to EXPECTED_TEXT. */
1844 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1845 SELFTEST_BEGIN_STMT \
1846 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1847 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1848 SELFTEST_END_STMT
1850 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1851 and ranges from EXP_START_COL to EXP_FINISH_COL.
1852 Use LOC as the effective location of the selftest. */
1854 static void
1855 assert_token_loc_eq (const location &loc,
1856 const cpp_token *tok,
1857 const char *exp_filename, int exp_linenum,
1858 int exp_start_col, int exp_finish_col)
1860 location_t tok_loc = tok->src_loc;
1861 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1862 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1864 /* If location_t values are sufficiently high, then column numbers
1865 will be unavailable. */
1866 if (!should_have_column_data_p (tok_loc))
1867 return;
1869 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1870 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1871 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1872 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1875 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1876 SELFTEST_LOCATION as the effective location of the selftest. */
1878 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1879 EXP_START_COL, EXP_FINISH_COL) \
1880 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1881 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1883 /* Test of lexing a file using libcpp, verifying tokens and their
1884 location information. */
1886 static void
1887 test_lexer (const line_table_case &case_)
1889 /* Create a tempfile and write some text to it. */
1890 const char *content =
1891 /*00000000011111111112222222222333333.3333444444444.455555555556
1892 12345678901234567890123456789012345.6789012345678.901234567890. */
1893 ("test_name /* c-style comment */\n"
1894 " \"test literal\"\n"
1895 " // test c++-style comment\n"
1896 " 42\n");
1897 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1899 line_table_test ltt (case_);
1901 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1903 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1904 ASSERT_NE (fname, NULL);
1906 /* Verify that we get the expected tokens back, with the correct
1907 location information. */
1909 location_t loc;
1910 const cpp_token *tok;
1911 tok = cpp_get_token_with_location (parser, &loc);
1912 ASSERT_NE (tok, NULL);
1913 ASSERT_EQ (tok->type, CPP_NAME);
1914 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1915 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1917 tok = cpp_get_token_with_location (parser, &loc);
1918 ASSERT_NE (tok, NULL);
1919 ASSERT_EQ (tok->type, CPP_STRING);
1920 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1921 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1923 tok = cpp_get_token_with_location (parser, &loc);
1924 ASSERT_NE (tok, NULL);
1925 ASSERT_EQ (tok->type, CPP_NUMBER);
1926 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1927 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1929 tok = cpp_get_token_with_location (parser, &loc);
1930 ASSERT_NE (tok, NULL);
1931 ASSERT_EQ (tok->type, CPP_EOF);
1933 cpp_finish (parser, NULL);
1934 cpp_destroy (parser);
1937 /* Forward decls. */
1939 struct lexer_test;
1940 class lexer_test_options;
1942 /* A class for specifying options of a lexer_test.
1943 The "apply" vfunc is called during the lexer_test constructor. */
1945 class lexer_test_options
1947 public:
1948 virtual void apply (lexer_test &) = 0;
1951 /* A struct for writing lexer tests. */
1953 struct lexer_test
1955 lexer_test (const line_table_case &case_, const char *content,
1956 lexer_test_options *options);
1957 ~lexer_test ();
1959 const cpp_token *get_token ();
1961 temp_source_file m_tempfile;
1962 line_table_test m_ltt;
1963 cpp_reader *m_parser;
1964 string_concat_db m_concats;
1967 /* Use an EBCDIC encoding for the execution charset, specifically
1968 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
1970 This exercises iconv integration within libcpp.
1971 Not every build of iconv supports the given charset,
1972 so we need to flag this error and handle it gracefully. */
1974 class ebcdic_execution_charset : public lexer_test_options
1976 public:
1977 ebcdic_execution_charset () : m_num_iconv_errors (0)
1979 gcc_assert (s_singleton == NULL);
1980 s_singleton = this;
1982 ~ebcdic_execution_charset ()
1984 gcc_assert (s_singleton == this);
1985 s_singleton = NULL;
1988 void apply (lexer_test &test) FINAL OVERRIDE
1990 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
1991 cpp_opts->narrow_charset = "IBM1047";
1993 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
1994 callbacks->error = on_error;
1997 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
1998 int level ATTRIBUTE_UNUSED,
1999 int reason ATTRIBUTE_UNUSED,
2000 rich_location *richloc ATTRIBUTE_UNUSED,
2001 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2002 ATTRIBUTE_FPTR_PRINTF(5,0)
2004 gcc_assert (s_singleton);
2005 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2006 when the local iconv build doesn't support the conversion. */
2007 if (strstr (msgid, "not supported by iconv"))
2009 s_singleton->m_num_iconv_errors++;
2010 return true;
2013 /* Otherwise, we have an unexpected error. */
2014 abort ();
2017 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2019 private:
2020 static ebcdic_execution_charset *s_singleton;
2021 int m_num_iconv_errors;
2024 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2026 /* Constructor. Override line_table with a new instance based on CASE_,
2027 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2028 start parsing the tempfile. */
2030 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2031 lexer_test_options *options) :
2032 /* Create a tempfile and write the text to it. */
2033 m_tempfile (SELFTEST_LOCATION, ".c", content),
2034 m_ltt (case_),
2035 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2036 m_concats ()
2038 if (options)
2039 options->apply (*this);
2041 cpp_init_iconv (m_parser);
2043 /* Parse the file. */
2044 const char *fname = cpp_read_main_file (m_parser,
2045 m_tempfile.get_filename ());
2046 ASSERT_NE (fname, NULL);
2049 /* Destructor. Verify that the next token in m_parser is EOF. */
2051 lexer_test::~lexer_test ()
2053 location_t loc;
2054 const cpp_token *tok;
2056 tok = cpp_get_token_with_location (m_parser, &loc);
2057 ASSERT_NE (tok, NULL);
2058 ASSERT_EQ (tok->type, CPP_EOF);
2060 cpp_finish (m_parser, NULL);
2061 cpp_destroy (m_parser);
2064 /* Get the next token from m_parser. */
2066 const cpp_token *
2067 lexer_test::get_token ()
2069 location_t loc;
2070 const cpp_token *tok;
2072 tok = cpp_get_token_with_location (m_parser, &loc);
2073 ASSERT_NE (tok, NULL);
2074 return tok;
2077 /* Verify that locations within string literals are correctly handled. */
2079 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2080 using the string concatenation database for TEST.
2082 Assert that the character at index IDX is on EXPECTED_LINE,
2083 and that it begins at column EXPECTED_START_COL and ends at
2084 EXPECTED_FINISH_COL (unless the locations are beyond
2085 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2086 columns). */
2088 static void
2089 assert_char_at_range (const location &loc,
2090 lexer_test& test,
2091 location_t strloc, enum cpp_ttype type, int idx,
2092 int expected_line, int expected_start_col,
2093 int expected_finish_col)
2095 cpp_reader *pfile = test.m_parser;
2096 string_concat_db *concats = &test.m_concats;
2098 source_range actual_range;
2099 const char *err
2100 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2101 &actual_range);
2102 if (should_have_column_data_p (strloc))
2103 ASSERT_EQ_AT (loc, NULL, err);
2104 else
2106 ASSERT_STREQ_AT (loc,
2107 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2108 err);
2109 return;
2112 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2113 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2114 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2115 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2117 if (should_have_column_data_p (actual_range.m_start))
2119 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2120 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2122 if (should_have_column_data_p (actual_range.m_finish))
2124 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2125 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2129 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2130 the effective location of any errors. */
2132 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2133 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2134 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2135 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2136 (EXPECTED_FINISH_COL))
2138 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2139 using the string concatenation database for TEST.
2141 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2143 static void
2144 assert_num_substring_ranges (const location &loc,
2145 lexer_test& test,
2146 location_t strloc,
2147 enum cpp_ttype type,
2148 int expected_num_ranges)
2150 cpp_reader *pfile = test.m_parser;
2151 string_concat_db *concats = &test.m_concats;
2153 int actual_num_ranges = -1;
2154 const char *err
2155 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2156 &actual_num_ranges);
2157 if (should_have_column_data_p (strloc))
2158 ASSERT_EQ_AT (loc, NULL, err);
2159 else
2161 ASSERT_STREQ_AT (loc,
2162 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2163 err);
2164 return;
2166 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2169 /* Macro for calling assert_num_substring_ranges, supplying
2170 SELFTEST_LOCATION for the effective location of any errors. */
2172 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2173 EXPECTED_NUM_RANGES) \
2174 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2175 (TYPE), (EXPECTED_NUM_RANGES))
2178 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2179 returns an error (using the string concatenation database for TEST). */
2181 static void
2182 assert_has_no_substring_ranges (const location &loc,
2183 lexer_test& test,
2184 location_t strloc,
2185 enum cpp_ttype type,
2186 const char *expected_err)
2188 cpp_reader *pfile = test.m_parser;
2189 string_concat_db *concats = &test.m_concats;
2190 cpp_substring_ranges ranges;
2191 const char *actual_err
2192 = get_substring_ranges_for_loc (pfile, concats, strloc,
2193 type, ranges);
2194 if (should_have_column_data_p (strloc))
2195 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2196 else
2197 ASSERT_STREQ_AT (loc,
2198 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2199 actual_err);
2202 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2203 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2204 (STRLOC), (TYPE), (ERR))
2206 /* Lex a simple string literal. Verify the substring location data, before
2207 and after running cpp_interpret_string on it. */
2209 static void
2210 test_lexer_string_locations_simple (const line_table_case &case_)
2212 /* Digits 0-9 (with 0 at column 10), the simple way.
2213 ....................000000000.11111111112.2222222223333333333
2214 ....................123456789.01234567890.1234567890123456789
2215 We add a trailing comment to ensure that we correctly locate
2216 the end of the string literal token. */
2217 const char *content = " \"0123456789\" /* not a string */\n";
2218 lexer_test test (case_, content, NULL);
2220 /* Verify that we get the expected token back, with the correct
2221 location information. */
2222 const cpp_token *tok = test.get_token ();
2223 ASSERT_EQ (tok->type, CPP_STRING);
2224 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2225 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2227 /* At this point in lexing, the quote characters are treated as part of
2228 the string (they are stripped off by cpp_interpret_string). */
2230 ASSERT_EQ (tok->val.str.len, 12);
2232 /* Verify that cpp_interpret_string works. */
2233 cpp_string dst_string;
2234 const enum cpp_ttype type = CPP_STRING;
2235 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2236 &dst_string, type);
2237 ASSERT_TRUE (result);
2238 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2239 free (const_cast <unsigned char *> (dst_string.text));
2241 /* Verify ranges of individual characters. This no longer includes the
2242 quotes. */
2243 for (int i = 0; i <= 9; i++)
2244 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2245 10 + i, 10 + i);
2247 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2250 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2251 encoding. */
2253 static void
2254 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2256 /* EBCDIC support requires iconv. */
2257 if (!HAVE_ICONV)
2258 return;
2260 /* Digits 0-9 (with 0 at column 10), the simple way.
2261 ....................000000000.11111111112.2222222223333333333
2262 ....................123456789.01234567890.1234567890123456789
2263 We add a trailing comment to ensure that we correctly locate
2264 the end of the string literal token. */
2265 const char *content = " \"0123456789\" /* not a string */\n";
2266 ebcdic_execution_charset use_ebcdic;
2267 lexer_test test (case_, content, &use_ebcdic);
2269 /* Verify that we get the expected token back, with the correct
2270 location information. */
2271 const cpp_token *tok = test.get_token ();
2272 ASSERT_EQ (tok->type, CPP_STRING);
2273 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2274 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2276 /* At this point in lexing, the quote characters are treated as part of
2277 the string (they are stripped off by cpp_interpret_string). */
2279 ASSERT_EQ (tok->val.str.len, 12);
2281 /* The remainder of the test requires an iconv implementation that
2282 can convert from UTF-8 to the EBCDIC encoding requested above. */
2283 if (use_ebcdic.iconv_errors_occurred_p ())
2284 return;
2286 /* Verify that cpp_interpret_string works. */
2287 cpp_string dst_string;
2288 const enum cpp_ttype type = CPP_STRING;
2289 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2290 &dst_string, type);
2291 ASSERT_TRUE (result);
2292 /* We should now have EBCDIC-encoded text, specifically
2293 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2294 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2295 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2296 (const char *)dst_string.text);
2297 free (const_cast <unsigned char *> (dst_string.text));
2299 /* Verify that we don't attempt to record substring location information
2300 for such cases. */
2301 ASSERT_HAS_NO_SUBSTRING_RANGES
2302 (test, tok->src_loc, type,
2303 "execution character set != source character set");
2306 /* Lex a string literal containing a hex-escaped character.
2307 Verify the substring location data, before and after running
2308 cpp_interpret_string on it. */
2310 static void
2311 test_lexer_string_locations_hex (const line_table_case &case_)
2313 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2314 and with a space in place of digit 6, to terminate the escaped
2315 hex code.
2316 ....................000000000.111111.11112222.
2317 ....................123456789.012345.67890123. */
2318 const char *content = " \"01234\\x35 789\"\n";
2319 lexer_test test (case_, content, NULL);
2321 /* Verify that we get the expected token back, with the correct
2322 location information. */
2323 const cpp_token *tok = test.get_token ();
2324 ASSERT_EQ (tok->type, CPP_STRING);
2325 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2326 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2328 /* At this point in lexing, the quote characters are treated as part of
2329 the string (they are stripped off by cpp_interpret_string). */
2330 ASSERT_EQ (tok->val.str.len, 15);
2332 /* Verify that cpp_interpret_string works. */
2333 cpp_string dst_string;
2334 const enum cpp_ttype type = CPP_STRING;
2335 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2336 &dst_string, type);
2337 ASSERT_TRUE (result);
2338 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2339 free (const_cast <unsigned char *> (dst_string.text));
2341 /* Verify ranges of individual characters. This no longer includes the
2342 quotes. */
2343 for (int i = 0; i <= 4; i++)
2344 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2345 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2346 for (int i = 6; i <= 9; i++)
2347 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2349 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2352 /* Lex a string literal containing an octal-escaped character.
2353 Verify the substring location data after running cpp_interpret_string
2354 on it. */
2356 static void
2357 test_lexer_string_locations_oct (const line_table_case &case_)
2359 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2360 and with a space in place of digit 6, to terminate the escaped
2361 octal code.
2362 ....................000000000.111111.11112222.2222223333333333444
2363 ....................123456789.012345.67890123.4567890123456789012 */
2364 const char *content = " \"01234\\065 789\" /* not a string */\n";
2365 lexer_test test (case_, content, NULL);
2367 /* Verify that we get the expected token back, with the correct
2368 location information. */
2369 const cpp_token *tok = test.get_token ();
2370 ASSERT_EQ (tok->type, CPP_STRING);
2371 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2373 /* Verify that cpp_interpret_string works. */
2374 cpp_string dst_string;
2375 const enum cpp_ttype type = CPP_STRING;
2376 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2377 &dst_string, type);
2378 ASSERT_TRUE (result);
2379 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2380 free (const_cast <unsigned char *> (dst_string.text));
2382 /* Verify ranges of individual characters. This no longer includes the
2383 quotes. */
2384 for (int i = 0; i < 5; i++)
2385 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2386 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2387 for (int i = 6; i <= 9; i++)
2388 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2390 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2393 /* Test of string literal containing letter escapes. */
2395 static void
2396 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2398 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2399 .....................000000000.1.11111.1.1.11222.22222223333333
2400 .....................123456789.0.12345.6.7.89012.34567890123456. */
2401 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2402 lexer_test test (case_, content, NULL);
2404 /* Verify that we get the expected tokens back. */
2405 const cpp_token *tok = test.get_token ();
2406 ASSERT_EQ (tok->type, CPP_STRING);
2407 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2409 /* Verify ranges of individual characters. */
2410 /* "\t". */
2411 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2412 0, 1, 10, 11);
2413 /* "foo". */
2414 for (int i = 1; i <= 3; i++)
2415 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2416 i, 1, 11 + i, 11 + i);
2417 /* "\\" and "\n". */
2418 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2419 4, 1, 15, 16);
2420 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2421 5, 1, 17, 18);
2423 /* "bar". */
2424 for (int i = 6; i <= 8; i++)
2425 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2426 i, 1, 13 + i, 13 + i);
2428 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 9);
2431 /* Another test of a string literal containing a letter escape.
2432 Based on string seen in
2433 printf ("%-%\n");
2434 in gcc.dg/format/c90-printf-1.c. */
2436 static void
2437 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2439 /* .....................000000000.1111.11.1111.22222222223.
2440 .....................123456789.0123.45.6789.01234567890. */
2441 const char *content = (" \"%-%\\n\" /* non-str */\n");
2442 lexer_test test (case_, content, NULL);
2444 /* Verify that we get the expected tokens back. */
2445 const cpp_token *tok = test.get_token ();
2446 ASSERT_EQ (tok->type, CPP_STRING);
2447 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2449 /* Verify ranges of individual characters. */
2450 /* "%-%". */
2451 for (int i = 0; i < 3; i++)
2452 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2453 i, 1, 10 + i, 10 + i);
2454 /* "\n". */
2455 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2456 3, 1, 13, 14);
2458 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 4);
2461 /* Lex a string literal containing UCN 4 characters.
2462 Verify the substring location data after running cpp_interpret_string
2463 on it. */
2465 static void
2466 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2468 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2469 as UCN 4.
2470 ....................000000000.111111.111122.222222223.33333333344444
2471 ....................123456789.012345.678901.234567890.12345678901234 */
2472 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2473 lexer_test test (case_, content, NULL);
2475 /* Verify that we get the expected token back, with the correct
2476 location information. */
2477 const cpp_token *tok = test.get_token ();
2478 ASSERT_EQ (tok->type, CPP_STRING);
2479 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2481 /* Verify that cpp_interpret_string works.
2482 The string should be encoded in the execution character
2483 set. Assuming that that is UTF-8, we should have the following:
2484 ----------- ---- ----- ------- ----------------
2485 Byte offset Byte Octal Unicode Source Column(s)
2486 ----------- ---- ----- ------- ----------------
2487 0 0x30 '0' 10
2488 1 0x31 '1' 11
2489 2 0x32 '2' 12
2490 3 0x33 '3' 13
2491 4 0x34 '4' 14
2492 5 0xE2 \342 U+2174 15-20
2493 6 0x85 \205 (cont) 15-20
2494 7 0xB4 \264 (cont) 15-20
2495 8 0xE2 \342 U+2175 21-26
2496 9 0x85 \205 (cont) 21-26
2497 10 0xB5 \265 (cont) 21-26
2498 11 0x37 '7' 27
2499 12 0x38 '8' 28
2500 13 0x39 '9' 29
2501 ----------- ---- ----- ------- ---------------. */
2503 cpp_string dst_string;
2504 const enum cpp_ttype type = CPP_STRING;
2505 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2506 &dst_string, type);
2507 ASSERT_TRUE (result);
2508 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2509 (const char *)dst_string.text);
2510 free (const_cast <unsigned char *> (dst_string.text));
2512 /* Verify ranges of individual characters. This no longer includes the
2513 quotes.
2514 '01234'. */
2515 for (int i = 0; i <= 4; i++)
2516 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2517 /* U+2174. */
2518 for (int i = 5; i <= 7; i++)
2519 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2520 /* U+2175. */
2521 for (int i = 8; i <= 10; i++)
2522 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2523 /* '789'. */
2524 for (int i = 11; i <= 13; i++)
2525 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2527 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 14);
2530 /* Lex a string literal containing UCN 8 characters.
2531 Verify the substring location data after running cpp_interpret_string
2532 on it. */
2534 static void
2535 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2537 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2538 ....................000000000.111111.1111222222.2222333333333.344444
2539 ....................123456789.012345.6789012345.6789012345678.901234 */
2540 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2541 lexer_test test (case_, content, NULL);
2543 /* Verify that we get the expected token back, with the correct
2544 location information. */
2545 const cpp_token *tok = test.get_token ();
2546 ASSERT_EQ (tok->type, CPP_STRING);
2547 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2548 "\"01234\\U00002174\\U00002175789\"");
2550 /* Verify that cpp_interpret_string works.
2551 The UTF-8 encoding of the string is identical to that from
2552 the ucn4 testcase above; the only difference is the column
2553 locations. */
2554 cpp_string dst_string;
2555 const enum cpp_ttype type = CPP_STRING;
2556 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2557 &dst_string, type);
2558 ASSERT_TRUE (result);
2559 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2560 (const char *)dst_string.text);
2561 free (const_cast <unsigned char *> (dst_string.text));
2563 /* Verify ranges of individual characters. This no longer includes the
2564 quotes.
2565 '01234'. */
2566 for (int i = 0; i <= 4; i++)
2567 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2568 /* U+2174. */
2569 for (int i = 5; i <= 7; i++)
2570 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2571 /* U+2175. */
2572 for (int i = 8; i <= 10; i++)
2573 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2574 /* '789' at columns 35-37 */
2575 for (int i = 11; i <= 13; i++)
2576 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2578 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 14);
2581 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2583 static uint32_t
2584 uint32_from_big_endian (const uint32_t *ptr_be_value)
2586 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2587 return (((uint32_t) buf[0] << 24)
2588 | ((uint32_t) buf[1] << 16)
2589 | ((uint32_t) buf[2] << 8)
2590 | (uint32_t) buf[3]);
2593 /* Lex a wide string literal and verify that attempts to read substring
2594 location data from it fail gracefully. */
2596 static void
2597 test_lexer_string_locations_wide_string (const line_table_case &case_)
2599 /* Digits 0-9.
2600 ....................000000000.11111111112.22222222233333
2601 ....................123456789.01234567890.12345678901234 */
2602 const char *content = " L\"0123456789\" /* non-str */\n";
2603 lexer_test test (case_, content, NULL);
2605 /* Verify that we get the expected token back, with the correct
2606 location information. */
2607 const cpp_token *tok = test.get_token ();
2608 ASSERT_EQ (tok->type, CPP_WSTRING);
2609 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2611 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2612 cpp_string dst_string;
2613 const enum cpp_ttype type = CPP_WSTRING;
2614 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2615 &dst_string, type);
2616 ASSERT_TRUE (result);
2617 /* The cpp_reader defaults to big-endian with
2618 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2619 now be encoded as UTF-32BE. */
2620 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2621 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2622 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2623 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2624 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2625 free (const_cast <unsigned char *> (dst_string.text));
2627 /* We don't yet support generating substring location information
2628 for L"" strings. */
2629 ASSERT_HAS_NO_SUBSTRING_RANGES
2630 (test, tok->src_loc, type,
2631 "execution character set != source character set");
2634 /* Fetch a big-endian 16-bit value and convert to host endianness. */
2636 static uint16_t
2637 uint16_from_big_endian (const uint16_t *ptr_be_value)
2639 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2640 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2643 /* Lex a u"" string literal and verify that attempts to read substring
2644 location data from it fail gracefully. */
2646 static void
2647 test_lexer_string_locations_string16 (const line_table_case &case_)
2649 /* Digits 0-9.
2650 ....................000000000.11111111112.22222222233333
2651 ....................123456789.01234567890.12345678901234 */
2652 const char *content = " u\"0123456789\" /* non-str */\n";
2653 lexer_test test (case_, content, NULL);
2655 /* Verify that we get the expected token back, with the correct
2656 location information. */
2657 const cpp_token *tok = test.get_token ();
2658 ASSERT_EQ (tok->type, CPP_STRING16);
2659 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2661 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2662 cpp_string dst_string;
2663 const enum cpp_ttype type = CPP_STRING16;
2664 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2665 &dst_string, type);
2666 ASSERT_TRUE (result);
2668 /* The cpp_reader defaults to big-endian, so dst_string should
2669 now be encoded as UTF-16BE. */
2670 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2671 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2672 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2673 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2674 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2675 free (const_cast <unsigned char *> (dst_string.text));
2677 /* We don't yet support generating substring location information
2678 for L"" strings. */
2679 ASSERT_HAS_NO_SUBSTRING_RANGES
2680 (test, tok->src_loc, type,
2681 "execution character set != source character set");
2684 /* Lex a U"" string literal and verify that attempts to read substring
2685 location data from it fail gracefully. */
2687 static void
2688 test_lexer_string_locations_string32 (const line_table_case &case_)
2690 /* Digits 0-9.
2691 ....................000000000.11111111112.22222222233333
2692 ....................123456789.01234567890.12345678901234 */
2693 const char *content = " U\"0123456789\" /* non-str */\n";
2694 lexer_test test (case_, content, NULL);
2696 /* Verify that we get the expected token back, with the correct
2697 location information. */
2698 const cpp_token *tok = test.get_token ();
2699 ASSERT_EQ (tok->type, CPP_STRING32);
2700 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2702 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2703 cpp_string dst_string;
2704 const enum cpp_ttype type = CPP_STRING32;
2705 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2706 &dst_string, type);
2707 ASSERT_TRUE (result);
2709 /* The cpp_reader defaults to big-endian, so dst_string should
2710 now be encoded as UTF-32BE. */
2711 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2712 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2713 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2714 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2715 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2716 free (const_cast <unsigned char *> (dst_string.text));
2718 /* We don't yet support generating substring location information
2719 for L"" strings. */
2720 ASSERT_HAS_NO_SUBSTRING_RANGES
2721 (test, tok->src_loc, type,
2722 "execution character set != source character set");
2725 /* Lex a u8-string literal.
2726 Verify the substring location data after running cpp_interpret_string
2727 on it. */
2729 static void
2730 test_lexer_string_locations_u8 (const line_table_case &case_)
2732 /* Digits 0-9.
2733 ....................000000000.11111111112.22222222233333
2734 ....................123456789.01234567890.12345678901234 */
2735 const char *content = " u8\"0123456789\" /* non-str */\n";
2736 lexer_test test (case_, content, NULL);
2738 /* Verify that we get the expected token back, with the correct
2739 location information. */
2740 const cpp_token *tok = test.get_token ();
2741 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2742 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2744 /* Verify that cpp_interpret_string works. */
2745 cpp_string dst_string;
2746 const enum cpp_ttype type = CPP_STRING;
2747 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2748 &dst_string, type);
2749 ASSERT_TRUE (result);
2750 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2751 free (const_cast <unsigned char *> (dst_string.text));
2753 /* Verify ranges of individual characters. This no longer includes the
2754 quotes. */
2755 for (int i = 0; i <= 9; i++)
2756 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2759 /* Lex a string literal containing UTF-8 source characters.
2760 Verify the substring location data after running cpp_interpret_string
2761 on it. */
2763 static void
2764 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2766 /* This string literal is written out to the source file as UTF-8,
2767 and is of the form "before mojibake after", where "mojibake"
2768 is written as the following four unicode code points:
2769 U+6587 CJK UNIFIED IDEOGRAPH-6587
2770 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2771 U+5316 CJK UNIFIED IDEOGRAPH-5316
2772 U+3051 HIRAGANA LETTER KE.
2773 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2774 "before" and "after" are 1 byte per unicode character.
2776 The numbering shown are "columns", which are *byte* numbers within
2777 the line, rather than unicode character numbers.
2779 .................... 000000000.1111111.
2780 .................... 123456789.0123456. */
2781 const char *content = (" \"before "
2782 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2783 UTF-8: 0xE6 0x96 0x87
2784 C octal escaped UTF-8: \346\226\207
2785 "column" numbers: 17-19. */
2786 "\346\226\207"
2788 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2789 UTF-8: 0xE5 0xAD 0x97
2790 C octal escaped UTF-8: \345\255\227
2791 "column" numbers: 20-22. */
2792 "\345\255\227"
2794 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2795 UTF-8: 0xE5 0x8C 0x96
2796 C octal escaped UTF-8: \345\214\226
2797 "column" numbers: 23-25. */
2798 "\345\214\226"
2800 /* U+3051 HIRAGANA LETTER KE
2801 UTF-8: 0xE3 0x81 0x91
2802 C octal escaped UTF-8: \343\201\221
2803 "column" numbers: 26-28. */
2804 "\343\201\221"
2806 /* column numbers 29 onwards
2807 2333333.33334444444444
2808 9012345.67890123456789. */
2809 " after\" /* non-str */\n");
2810 lexer_test test (case_, content, NULL);
2812 /* Verify that we get the expected token back, with the correct
2813 location information. */
2814 const cpp_token *tok = test.get_token ();
2815 ASSERT_EQ (tok->type, CPP_STRING);
2816 ASSERT_TOKEN_AS_TEXT_EQ
2817 (test.m_parser, tok,
2818 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2820 /* Verify that cpp_interpret_string works. */
2821 cpp_string dst_string;
2822 const enum cpp_ttype type = CPP_STRING;
2823 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2824 &dst_string, type);
2825 ASSERT_TRUE (result);
2826 ASSERT_STREQ
2827 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2828 (const char *)dst_string.text);
2829 free (const_cast <unsigned char *> (dst_string.text));
2831 /* Verify ranges of individual characters. This no longer includes the
2832 quotes.
2833 Assuming that both source and execution encodings are UTF-8, we have
2834 a run of 25 octets in each. */
2835 for (int i = 0; i < 25; i++)
2836 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2838 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 25);
2841 /* Test of string literal concatenation. */
2843 static void
2844 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
2846 /* Digits 0-9.
2847 .....................000000000.111111.11112222222222
2848 .....................123456789.012345.67890123456789. */
2849 const char *content = (" \"01234\" /* non-str */\n"
2850 " \"56789\" /* non-str */\n");
2851 lexer_test test (case_, content, NULL);
2853 location_t input_locs[2];
2855 /* Verify that we get the expected tokens back. */
2856 auto_vec <cpp_string> input_strings;
2857 const cpp_token *tok_a = test.get_token ();
2858 ASSERT_EQ (tok_a->type, CPP_STRING);
2859 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
2860 input_strings.safe_push (tok_a->val.str);
2861 input_locs[0] = tok_a->src_loc;
2863 const cpp_token *tok_b = test.get_token ();
2864 ASSERT_EQ (tok_b->type, CPP_STRING);
2865 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
2866 input_strings.safe_push (tok_b->val.str);
2867 input_locs[1] = tok_b->src_loc;
2869 /* Verify that cpp_interpret_string works. */
2870 cpp_string dst_string;
2871 const enum cpp_ttype type = CPP_STRING;
2872 bool result = cpp_interpret_string (test.m_parser,
2873 input_strings.address (), 2,
2874 &dst_string, type);
2875 ASSERT_TRUE (result);
2876 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2877 free (const_cast <unsigned char *> (dst_string.text));
2879 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2880 test.m_concats.record_string_concatenation (2, input_locs);
2882 location_t initial_loc = input_locs[0];
2884 for (int i = 0; i <= 4; i++)
2885 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2886 for (int i = 5; i <= 9; i++)
2887 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
2889 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2892 /* Another test of string literal concatenation. */
2894 static void
2895 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
2897 /* Digits 0-9.
2898 .....................000000000.111.11111112222222
2899 .....................123456789.012.34567890123456. */
2900 const char *content = (" \"01\" /* non-str */\n"
2901 " \"23\" /* non-str */\n"
2902 " \"45\" /* non-str */\n"
2903 " \"67\" /* non-str */\n"
2904 " \"89\" /* non-str */\n");
2905 lexer_test test (case_, content, NULL);
2907 auto_vec <cpp_string> input_strings;
2908 location_t input_locs[5];
2910 /* Verify that we get the expected tokens back. */
2911 for (int i = 0; i < 5; i++)
2913 const cpp_token *tok = test.get_token ();
2914 ASSERT_EQ (tok->type, CPP_STRING);
2915 input_strings.safe_push (tok->val.str);
2916 input_locs[i] = tok->src_loc;
2919 /* Verify that cpp_interpret_string works. */
2920 cpp_string dst_string;
2921 const enum cpp_ttype type = CPP_STRING;
2922 bool result = cpp_interpret_string (test.m_parser,
2923 input_strings.address (), 5,
2924 &dst_string, type);
2925 ASSERT_TRUE (result);
2926 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2927 free (const_cast <unsigned char *> (dst_string.text));
2929 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2930 test.m_concats.record_string_concatenation (5, input_locs);
2932 location_t initial_loc = input_locs[0];
2934 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
2935 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
2936 and expect get_source_range_for_substring to fail.
2937 However, for a string concatenation test, we can have a case
2938 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
2939 but subsequent strings can be after it.
2940 Attempting to detect this within assert_char_at_range
2941 would overcomplicate the logic for the common test cases, so
2942 we detect it here. */
2943 if (should_have_column_data_p (input_locs[0])
2944 && !should_have_column_data_p (input_locs[4]))
2946 /* Verify that get_source_range_for_substring gracefully rejects
2947 this case. */
2948 source_range actual_range;
2949 const char *err
2950 = get_source_range_for_char (test.m_parser, &test.m_concats,
2951 initial_loc, type, 0, &actual_range);
2952 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
2953 return;
2956 for (int i = 0; i < 5; i++)
2957 for (int j = 0; j < 2; j++)
2958 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
2959 i + 1, 10 + j, 10 + j);
2961 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2964 /* Another test of string literal concatenation, this time combined with
2965 various kinds of escaped characters. */
2967 static void
2968 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
2970 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
2971 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
2972 const char *content
2973 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
2974 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
2975 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
2976 lexer_test test (case_, content, NULL);
2978 auto_vec <cpp_string> input_strings;
2979 location_t input_locs[4];
2981 /* Verify that we get the expected tokens back. */
2982 for (int i = 0; i < 4; i++)
2984 const cpp_token *tok = test.get_token ();
2985 ASSERT_EQ (tok->type, CPP_STRING);
2986 input_strings.safe_push (tok->val.str);
2987 input_locs[i] = tok->src_loc;
2990 /* Verify that cpp_interpret_string works. */
2991 cpp_string dst_string;
2992 const enum cpp_ttype type = CPP_STRING;
2993 bool result = cpp_interpret_string (test.m_parser,
2994 input_strings.address (), 4,
2995 &dst_string, type);
2996 ASSERT_TRUE (result);
2997 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2998 free (const_cast <unsigned char *> (dst_string.text));
3000 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3001 test.m_concats.record_string_concatenation (4, input_locs);
3003 location_t initial_loc = input_locs[0];
3005 for (int i = 0; i <= 4; i++)
3006 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3007 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3008 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3009 for (int i = 7; i <= 9; i++)
3010 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3012 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
3015 /* Test of string literal in a macro. */
3017 static void
3018 test_lexer_string_locations_macro (const line_table_case &case_)
3020 /* Digits 0-9.
3021 .....................0000000001111111111.22222222223.
3022 .....................1234567890123456789.01234567890. */
3023 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3024 " MACRO");
3025 lexer_test test (case_, content, NULL);
3027 /* Verify that we get the expected tokens back. */
3028 const cpp_token *tok = test.get_token ();
3029 ASSERT_EQ (tok->type, CPP_PADDING);
3031 tok = test.get_token ();
3032 ASSERT_EQ (tok->type, CPP_STRING);
3033 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3035 /* Verify ranges of individual characters. We ought to
3036 see columns within the macro definition. */
3037 for (int i = 0; i <= 9; i++)
3038 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3039 i, 1, 20 + i, 20 + i);
3041 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
3043 tok = test.get_token ();
3044 ASSERT_EQ (tok->type, CPP_PADDING);
3047 /* Test of stringification of a macro argument. */
3049 static void
3050 test_lexer_string_locations_stringified_macro_argument
3051 (const line_table_case &case_)
3053 /* .....................000000000111111111122222222223.
3054 .....................123456789012345678901234567890. */
3055 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3056 "MACRO(foo)\n");
3057 lexer_test test (case_, content, NULL);
3059 /* Verify that we get the expected token back. */
3060 const cpp_token *tok = test.get_token ();
3061 ASSERT_EQ (tok->type, CPP_PADDING);
3063 tok = test.get_token ();
3064 ASSERT_EQ (tok->type, CPP_STRING);
3065 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3067 /* We don't support getting the location of a stringified macro
3068 argument. Verify that it fails gracefully. */
3069 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3070 "cpp_interpret_string_1 failed");
3072 tok = test.get_token ();
3073 ASSERT_EQ (tok->type, CPP_PADDING);
3075 tok = test.get_token ();
3076 ASSERT_EQ (tok->type, CPP_PADDING);
3079 /* Ensure that we are fail gracefully if something attempts to pass
3080 in a location that isn't a string literal token. Seen on this code:
3082 const char a[] = " %d ";
3083 __builtin_printf (a, 0.5);
3086 when c-format.c erroneously used the indicated one-character
3087 location as the format string location, leading to a read past the
3088 end of a string buffer in cpp_interpret_string_1. */
3090 static void
3091 test_lexer_string_locations_non_string (const line_table_case &case_)
3093 /* .....................000000000111111111122222222223.
3094 .....................123456789012345678901234567890. */
3095 const char *content = (" a\n");
3096 lexer_test test (case_, content, NULL);
3098 /* Verify that we get the expected token back. */
3099 const cpp_token *tok = test.get_token ();
3100 ASSERT_EQ (tok->type, CPP_NAME);
3101 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3103 /* At this point, libcpp is attempting to interpret the name as a
3104 string literal, despite it not starting with a quote. We don't detect
3105 that, but we should at least fail gracefully. */
3106 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3107 "cpp_interpret_string_1 failed");
3110 /* Ensure that we can read substring information for a token which
3111 starts in one linemap and ends in another . Adapted from
3112 gcc.dg/cpp/pr69985.c. */
3114 static void
3115 test_lexer_string_locations_long_line (const line_table_case &case_)
3117 /* .....................000000.000111111111
3118 .....................123456.789012346789. */
3119 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3120 " \"0123456789012345678901234567890123456789"
3121 "0123456789012345678901234567890123456789"
3122 "0123456789012345678901234567890123456789"
3123 "0123456789\"\n");
3125 lexer_test test (case_, content, NULL);
3127 /* Verify that we get the expected token back. */
3128 const cpp_token *tok = test.get_token ();
3129 ASSERT_EQ (tok->type, CPP_STRING);
3131 if (!should_have_column_data_p (line_table->highest_location))
3132 return;
3134 /* Verify ranges of individual characters. */
3135 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 130);
3136 for (int i = 0; i < 130; i++)
3137 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3138 i, 2, 7 + i, 7 + i);
3141 /* Test of lexing char constants. */
3143 static void
3144 test_lexer_char_constants (const line_table_case &case_)
3146 /* Various char constants.
3147 .....................0000000001111111111.22222222223.
3148 .....................1234567890123456789.01234567890. */
3149 const char *content = (" 'a'\n"
3150 " u'a'\n"
3151 " U'a'\n"
3152 " L'a'\n"
3153 " 'abc'\n");
3154 lexer_test test (case_, content, NULL);
3156 /* Verify that we get the expected tokens back. */
3157 /* 'a'. */
3158 const cpp_token *tok = test.get_token ();
3159 ASSERT_EQ (tok->type, CPP_CHAR);
3160 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3162 unsigned int chars_seen;
3163 int unsignedp;
3164 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3165 &chars_seen, &unsignedp);
3166 ASSERT_EQ (cc, 'a');
3167 ASSERT_EQ (chars_seen, 1);
3169 /* u'a'. */
3170 tok = test.get_token ();
3171 ASSERT_EQ (tok->type, CPP_CHAR16);
3172 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3174 /* U'a'. */
3175 tok = test.get_token ();
3176 ASSERT_EQ (tok->type, CPP_CHAR32);
3177 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3179 /* L'a'. */
3180 tok = test.get_token ();
3181 ASSERT_EQ (tok->type, CPP_WCHAR);
3182 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3184 /* 'abc' (c-char-sequence). */
3185 tok = test.get_token ();
3186 ASSERT_EQ (tok->type, CPP_CHAR);
3187 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3189 /* A table of interesting location_t values, giving one axis of our test
3190 matrix. */
3192 static const location_t boundary_locations[] = {
3193 /* Zero means "don't override the default values for a new line_table". */
3196 /* An arbitrary non-zero value that isn't close to one of
3197 the boundary values below. */
3198 0x10000,
3200 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3201 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3202 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3203 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3204 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3205 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3207 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3208 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3209 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3210 LINE_MAP_MAX_LOCATION_WITH_COLS,
3211 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3212 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3215 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3217 void
3218 for_each_line_table_case (void (*testcase) (const line_table_case &))
3220 /* As noted above in the description of struct line_table_case,
3221 we want to explore a test matrix of interesting line_table
3222 situations, running various selftests for each case within the
3223 matrix. */
3225 /* Run all tests with:
3226 (a) line_table->default_range_bits == 0, and
3227 (b) line_table->default_range_bits == 5. */
3228 int num_cases_tested = 0;
3229 for (int default_range_bits = 0; default_range_bits <= 5;
3230 default_range_bits += 5)
3232 /* ...and use each of the "interesting" location values as
3233 the starting location within line_table. */
3234 const int num_boundary_locations
3235 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3236 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3238 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3240 testcase (c);
3242 num_cases_tested++;
3246 /* Verify that we fully covered the test matrix. */
3247 ASSERT_EQ (num_cases_tested, 2 * 12);
3250 /* Run all of the selftests within this file. */
3252 void
3253 input_c_tests ()
3255 test_should_have_column_data_p ();
3256 test_unknown_location ();
3257 test_builtins ();
3258 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3260 for_each_line_table_case (test_accessing_ordinary_linemaps);
3261 for_each_line_table_case (test_lexer);
3262 for_each_line_table_case (test_lexer_string_locations_simple);
3263 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3264 for_each_line_table_case (test_lexer_string_locations_hex);
3265 for_each_line_table_case (test_lexer_string_locations_oct);
3266 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3267 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3268 for_each_line_table_case (test_lexer_string_locations_ucn4);
3269 for_each_line_table_case (test_lexer_string_locations_ucn8);
3270 for_each_line_table_case (test_lexer_string_locations_wide_string);
3271 for_each_line_table_case (test_lexer_string_locations_string16);
3272 for_each_line_table_case (test_lexer_string_locations_string32);
3273 for_each_line_table_case (test_lexer_string_locations_u8);
3274 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3275 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3276 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3277 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3278 for_each_line_table_case (test_lexer_string_locations_macro);
3279 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3280 for_each_line_table_case (test_lexer_string_locations_non_string);
3281 for_each_line_table_case (test_lexer_string_locations_long_line);
3282 for_each_line_table_case (test_lexer_char_constants);
3284 test_reading_source_line ();
3287 } // namespace selftest
3289 #endif /* CHECKING_P */