Default to dwarf version 4 on hppa64-hpux
[official-gcc.git] / gcc / input.c
blobdd753decfa0d3fa132998700f813f68a9a5f36c8
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2021 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic.h"
25 #include "selftest.h"
26 #include "cpplib.h"
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
32 /* Input charset configuration. */
33 static const char *default_charset_callback (const char *)
35 return nullptr;
38 void
39 file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
40 bool should_skip_bom)
42 in_context.ccb = (ccb ? ccb : default_charset_callback);
43 in_context.should_skip_bom = should_skip_bom;
46 /* This is a cache used by get_next_line to store the content of a
47 file to be searched for file lines. */
48 class file_cache_slot
50 public:
51 file_cache_slot ();
52 ~file_cache_slot ();
54 bool read_line_num (size_t line_num,
55 char ** line, ssize_t *line_len);
57 /* Accessors. */
58 const char *get_file_path () const { return m_file_path; }
59 unsigned get_use_count () const { return m_use_count; }
60 bool missing_trailing_newline_p () const
62 return m_missing_trailing_newline;
65 void inc_use_count () { m_use_count++; }
67 bool create (const file_cache::input_context &in_context,
68 const char *file_path, FILE *fp, unsigned highest_use_count);
69 void evict ();
71 private:
72 /* These are information used to store a line boundary. */
73 class line_info
75 public:
76 /* The line number. It starts from 1. */
77 size_t line_num;
79 /* The position (byte count) of the beginning of the line,
80 relative to the file data pointer. This starts at zero. */
81 size_t start_pos;
83 /* The position (byte count) of the last byte of the line. This
84 normally points to the '\n' character, or to one byte after the
85 last byte of the file, if the file doesn't contain a '\n'
86 character. */
87 size_t end_pos;
89 line_info (size_t l, size_t s, size_t e)
90 : line_num (l), start_pos (s), end_pos (e)
93 line_info ()
94 :line_num (0), start_pos (0), end_pos (0)
98 bool needs_read_p () const;
99 bool needs_grow_p () const;
100 void maybe_grow ();
101 bool read_data ();
102 bool maybe_read_data ();
103 bool get_next_line (char **line, ssize_t *line_len);
104 bool read_next_line (char ** line, ssize_t *line_len);
105 bool goto_next_line ();
107 static const size_t buffer_size = 4 * 1024;
108 static const size_t line_record_size = 100;
110 /* The number of time this file has been accessed. This is used
111 to designate which file cache to evict from the cache
112 array. */
113 unsigned m_use_count;
115 /* The file_path is the key for identifying a particular file in
116 the cache.
117 For libcpp-using code, the underlying buffer for this field is
118 owned by the corresponding _cpp_file within the cpp_reader. */
119 const char *m_file_path;
121 FILE *m_fp;
123 /* This points to the content of the file that we've read so
124 far. */
125 char *m_data;
127 /* The allocated buffer to be freed may start a little earlier than DATA,
128 e.g. if a UTF8 BOM was skipped at the beginning. */
129 int m_alloc_offset;
131 /* The size of the DATA array above.*/
132 size_t m_size;
134 /* The number of bytes read from the underlying file so far. This
135 must be less (or equal) than SIZE above. */
136 size_t m_nb_read;
138 /* The index of the beginning of the current line. */
139 size_t m_line_start_idx;
141 /* The number of the previous line read. This starts at 1. Zero
142 means we've read no line so far. */
143 size_t m_line_num;
145 /* This is the total number of lines of the current file. At the
146 moment, we try to get this information from the line map
147 subsystem. Note that this is just a hint. When using the C++
148 front-end, this hint is correct because the input file is then
149 completely tokenized before parsing starts; so the line map knows
150 the number of lines before compilation really starts. For e.g,
151 the C front-end, it can happen that we start emitting diagnostics
152 before the line map has seen the end of the file. */
153 size_t m_total_lines;
155 /* Could this file be missing a trailing newline on its final line?
156 Initially true (to cope with empty files), set to true/false
157 as each line is read. */
158 bool m_missing_trailing_newline;
160 /* This is a record of the beginning and end of the lines we've seen
161 while reading the file. This is useful to avoid walking the data
162 from the beginning when we are asked to read a line that is
163 before LINE_START_IDX above. Note that the maximum size of this
164 record is line_record_size, so that the memory consumption
165 doesn't explode. We thus scale total_lines down to
166 line_record_size. */
167 vec<line_info, va_heap> m_line_record;
169 void offset_buffer (int offset)
171 gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
172 : (size_t) offset <= m_size);
173 gcc_assert (m_data);
174 m_alloc_offset += offset;
175 m_data += offset;
176 m_size -= offset;
181 /* Current position in real source file. */
183 location_t input_location = UNKNOWN_LOCATION;
185 class line_maps *line_table;
187 /* A stashed copy of "line_table" for use by selftest::line_table_test.
188 This needs to be a global so that it can be a GC root, and thus
189 prevent the stashed copy from being garbage-collected if the GC runs
190 during a line_table_test. */
192 class line_maps *saved_line_table;
194 /* Expand the source location LOC into a human readable location. If
195 LOC resolves to a builtin location, the file name of the readable
196 location is set to the string "<built-in>". If EXPANSION_POINT_P is
197 TRUE and LOC is virtual, then it is resolved to the expansion
198 point of the involved macro. Otherwise, it is resolved to the
199 spelling location of the token.
201 When resolving to the spelling location of the token, if the
202 resulting location is for a built-in location (that is, it has no
203 associated line/column) in the context of a macro expansion, the
204 returned location is the first one (while unwinding the macro
205 location towards its expansion point) that is in real source
206 code.
208 ASPECT controls which part of the location to use. */
210 static expanded_location
211 expand_location_1 (location_t loc,
212 bool expansion_point_p,
213 enum location_aspect aspect)
215 expanded_location xloc;
216 const line_map_ordinary *map;
217 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
218 tree block = NULL;
220 if (IS_ADHOC_LOC (loc))
222 block = LOCATION_BLOCK (loc);
223 loc = LOCATION_LOCUS (loc);
226 memset (&xloc, 0, sizeof (xloc));
228 if (loc >= RESERVED_LOCATION_COUNT)
230 if (!expansion_point_p)
232 /* We want to resolve LOC to its spelling location.
234 But if that spelling location is a reserved location that
235 appears in the context of a macro expansion (like for a
236 location for a built-in token), let's consider the first
237 location (toward the expansion point) that is not reserved;
238 that is, the first location that is in real source code. */
239 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
240 loc, NULL);
241 lrk = LRK_SPELLING_LOCATION;
243 loc = linemap_resolve_location (line_table, loc, lrk, &map);
245 /* loc is now either in an ordinary map, or is a reserved location.
246 If it is a compound location, the caret is in a spelling location,
247 but the start/finish might still be a virtual location.
248 Depending of what the caller asked for, we may need to recurse
249 one level in order to resolve any virtual locations in the
250 end-points. */
251 switch (aspect)
253 default:
254 gcc_unreachable ();
255 /* Fall through. */
256 case LOCATION_ASPECT_CARET:
257 break;
258 case LOCATION_ASPECT_START:
260 location_t start = get_start (loc);
261 if (start != loc)
262 return expand_location_1 (start, expansion_point_p, aspect);
264 break;
265 case LOCATION_ASPECT_FINISH:
267 location_t finish = get_finish (loc);
268 if (finish != loc)
269 return expand_location_1 (finish, expansion_point_p, aspect);
271 break;
273 xloc = linemap_expand_location (line_table, map, loc);
276 xloc.data = block;
277 if (loc <= BUILTINS_LOCATION)
278 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
280 return xloc;
283 /* Initialize the set of cache used for files accessed by caret
284 diagnostic. */
286 static void
287 diagnostic_file_cache_init (void)
289 gcc_assert (global_dc);
290 if (global_dc->m_file_cache == NULL)
291 global_dc->m_file_cache = new file_cache ();
294 /* Free the resources used by the set of cache used for files accessed
295 by caret diagnostic. */
297 void
298 diagnostic_file_cache_fini (void)
300 if (global_dc->m_file_cache)
302 delete global_dc->m_file_cache;
303 global_dc->m_file_cache = NULL;
307 /* Return the total lines number that have been read so far by the
308 line map (in the preprocessor) so far. For languages like C++ that
309 entirely preprocess the input file before starting to parse, this
310 equals the actual number of lines of the file. */
312 static size_t
313 total_lines_num (const char *file_path)
315 size_t r = 0;
316 location_t l = 0;
317 if (linemap_get_file_highest_location (line_table, file_path, &l))
319 gcc_assert (l >= RESERVED_LOCATION_COUNT);
320 expanded_location xloc = expand_location (l);
321 r = xloc.line;
323 return r;
326 /* Lookup the cache used for the content of a given file accessed by
327 caret diagnostic. Return the found cached file, or NULL if no
328 cached file was found. */
330 file_cache_slot *
331 file_cache::lookup_file (const char *file_path)
333 gcc_assert (file_path);
335 /* This will contain the found cached file. */
336 file_cache_slot *r = NULL;
337 for (unsigned i = 0; i < num_file_slots; ++i)
339 file_cache_slot *c = &m_file_slots[i];
340 if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
342 c->inc_use_count ();
343 r = c;
347 if (r)
348 r->inc_use_count ();
350 return r;
353 /* Purge any mention of FILENAME from the cache of files used for
354 printing source code. For use in selftests when working
355 with tempfiles. */
357 void
358 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
360 gcc_assert (file_path);
362 if (!global_dc->m_file_cache)
363 return;
365 global_dc->m_file_cache->forcibly_evict_file (file_path);
368 void
369 file_cache::forcibly_evict_file (const char *file_path)
371 gcc_assert (file_path);
373 file_cache_slot *r = lookup_file (file_path);
374 if (!r)
375 /* Not found. */
376 return;
378 r->evict ();
381 void
382 file_cache_slot::evict ()
384 m_file_path = NULL;
385 if (m_fp)
386 fclose (m_fp);
387 m_fp = NULL;
388 m_nb_read = 0;
389 m_line_start_idx = 0;
390 m_line_num = 0;
391 m_line_record.truncate (0);
392 m_use_count = 0;
393 m_total_lines = 0;
394 m_missing_trailing_newline = true;
397 /* Return the file cache that has been less used, recently, or the
398 first empty one. If HIGHEST_USE_COUNT is non-null,
399 *HIGHEST_USE_COUNT is set to the highest use count of the entries
400 in the cache table. */
402 file_cache_slot*
403 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
405 diagnostic_file_cache_init ();
407 file_cache_slot *to_evict = &m_file_slots[0];
408 unsigned huc = to_evict->get_use_count ();
409 for (unsigned i = 1; i < num_file_slots; ++i)
411 file_cache_slot *c = &m_file_slots[i];
412 bool c_is_empty = (c->get_file_path () == NULL);
414 if (c->get_use_count () < to_evict->get_use_count ()
415 || (to_evict->get_file_path () && c_is_empty))
416 /* We evict C because it's either an entry with a lower use
417 count or one that is empty. */
418 to_evict = c;
420 if (huc < c->get_use_count ())
421 huc = c->get_use_count ();
423 if (c_is_empty)
424 /* We've reached the end of the cache; subsequent elements are
425 all empty. */
426 break;
429 if (highest_use_count)
430 *highest_use_count = huc;
432 return to_evict;
435 /* Create the cache used for the content of a given file to be
436 accessed by caret diagnostic. This cache is added to an array of
437 cache and can be retrieved by lookup_file_in_cache_tab. This
438 function returns the created cache. Note that only the last
439 num_file_slots files are cached. */
441 file_cache_slot*
442 file_cache::add_file (const char *file_path)
445 FILE *fp = fopen (file_path, "r");
446 if (fp == NULL)
447 return NULL;
449 unsigned highest_use_count = 0;
450 file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
451 if (!r->create (in_context, file_path, fp, highest_use_count))
452 return NULL;
453 return r;
456 /* Populate this slot for use on FILE_PATH and FP, dropping any
457 existing cached content within it. */
459 bool
460 file_cache_slot::create (const file_cache::input_context &in_context,
461 const char *file_path, FILE *fp,
462 unsigned highest_use_count)
464 m_file_path = file_path;
465 if (m_fp)
466 fclose (m_fp);
467 m_fp = fp;
468 if (m_alloc_offset)
469 offset_buffer (-m_alloc_offset);
470 m_nb_read = 0;
471 m_line_start_idx = 0;
472 m_line_num = 0;
473 m_line_record.truncate (0);
474 /* Ensure that this cache entry doesn't get evicted next time
475 add_file_to_cache_tab is called. */
476 m_use_count = ++highest_use_count;
477 m_total_lines = total_lines_num (file_path);
478 m_missing_trailing_newline = true;
481 /* Check the input configuration to determine if we need to do any
482 transformations, such as charset conversion or BOM skipping. */
483 if (const char *input_charset = in_context.ccb (file_path))
485 /* Need a full-blown conversion of the input charset. */
486 fclose (m_fp);
487 m_fp = NULL;
488 const cpp_converted_source cs
489 = cpp_get_converted_source (file_path, input_charset);
490 if (!cs.data)
491 return false;
492 if (m_data)
493 XDELETEVEC (m_data);
494 m_data = cs.data;
495 m_nb_read = m_size = cs.len;
496 m_alloc_offset = cs.data - cs.to_free;
498 else if (in_context.should_skip_bom)
500 if (read_data ())
502 const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
503 offset_buffer (offset);
504 m_nb_read -= offset;
508 return true;
511 /* file_cache's ctor. */
513 file_cache::file_cache ()
514 : m_file_slots (new file_cache_slot[num_file_slots])
516 initialize_input_context (nullptr, false);
519 /* file_cache's dtor. */
521 file_cache::~file_cache ()
523 delete[] m_file_slots;
526 /* Lookup the cache used for the content of a given file accessed by
527 caret diagnostic. If no cached file was found, create a new cache
528 for this file, add it to the array of cached file and return
529 it. */
531 file_cache_slot*
532 file_cache::lookup_or_add_file (const char *file_path)
534 file_cache_slot *r = lookup_file (file_path);
535 if (r == NULL)
536 r = add_file (file_path);
537 return r;
540 /* Default constructor for a cache of file used by caret
541 diagnostic. */
543 file_cache_slot::file_cache_slot ()
544 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
545 m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
546 m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
548 m_line_record.create (0);
551 /* Destructor for a cache of file used by caret diagnostic. */
553 file_cache_slot::~file_cache_slot ()
555 if (m_fp)
557 fclose (m_fp);
558 m_fp = NULL;
560 if (m_data)
562 offset_buffer (-m_alloc_offset);
563 XDELETEVEC (m_data);
564 m_data = 0;
566 m_line_record.release ();
569 /* Returns TRUE iff the cache would need to be filled with data coming
570 from the file. That is, either the cache is empty or full or the
571 current line is empty. Note that if the cache is full, it would
572 need to be extended and filled again. */
574 bool
575 file_cache_slot::needs_read_p () const
577 return m_fp && (m_nb_read == 0
578 || m_nb_read == m_size
579 || (m_line_start_idx >= m_nb_read - 1));
582 /* Return TRUE iff the cache is full and thus needs to be
583 extended. */
585 bool
586 file_cache_slot::needs_grow_p () const
588 return m_nb_read == m_size;
591 /* Grow the cache if it needs to be extended. */
593 void
594 file_cache_slot::maybe_grow ()
596 if (!needs_grow_p ())
597 return;
599 if (!m_data)
601 gcc_assert (m_size == 0 && m_alloc_offset == 0);
602 m_size = buffer_size;
603 m_data = XNEWVEC (char, m_size);
605 else
607 const int offset = m_alloc_offset;
608 offset_buffer (-offset);
609 m_size *= 2;
610 m_data = XRESIZEVEC (char, m_data, m_size);
611 offset_buffer (offset);
615 /* Read more data into the cache. Extends the cache if need be.
616 Returns TRUE iff new data could be read. */
618 bool
619 file_cache_slot::read_data ()
621 if (feof (m_fp) || ferror (m_fp))
622 return false;
624 maybe_grow ();
626 char * from = m_data + m_nb_read;
627 size_t to_read = m_size - m_nb_read;
628 size_t nb_read = fread (from, 1, to_read, m_fp);
630 if (ferror (m_fp))
631 return false;
633 m_nb_read += nb_read;
634 return !!nb_read;
637 /* Read new data iff the cache needs to be filled with more data
638 coming from the file FP. Return TRUE iff the cache was filled with
639 mode data. */
641 bool
642 file_cache_slot::maybe_read_data ()
644 if (!needs_read_p ())
645 return false;
646 return read_data ();
649 /* Read a new line from file FP, using C as a cache for the data
650 coming from the file. Upon successful completion, *LINE is set to
651 the beginning of the line found. *LINE points directly in the
652 line cache and is only valid until the next call of get_next_line.
653 *LINE_LEN is set to the length of the line. Note that the line
654 does not contain any terminal delimiter. This function returns
655 true if some data was read or process from the cache, false
656 otherwise. Note that subsequent calls to get_next_line might
657 make the content of *LINE invalid. */
659 bool
660 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
662 /* Fill the cache with data to process. */
663 maybe_read_data ();
665 size_t remaining_size = m_nb_read - m_line_start_idx;
666 if (remaining_size == 0)
667 /* There is no more data to process. */
668 return false;
670 char *line_start = m_data + m_line_start_idx;
672 char *next_line_start = NULL;
673 size_t len = 0;
674 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
675 if (line_end == NULL)
677 /* We haven't found the end-of-line delimiter in the cache.
678 Fill the cache with more data from the file and look for the
679 '\n'. */
680 while (maybe_read_data ())
682 line_start = m_data + m_line_start_idx;
683 remaining_size = m_nb_read - m_line_start_idx;
684 line_end = (char *) memchr (line_start, '\n', remaining_size);
685 if (line_end != NULL)
687 next_line_start = line_end + 1;
688 break;
691 if (line_end == NULL)
693 /* We've loadded all the file into the cache and still no
694 '\n'. Let's say the line ends up at one byte passed the
695 end of the file. This is to stay consistent with the case
696 of when the line ends up with a '\n' and line_end points to
697 that terminal '\n'. That consistency is useful below in
698 the len calculation. */
699 line_end = m_data + m_nb_read ;
700 m_missing_trailing_newline = true;
702 else
703 m_missing_trailing_newline = false;
705 else
707 next_line_start = line_end + 1;
708 m_missing_trailing_newline = false;
711 if (m_fp && ferror (m_fp))
712 return false;
714 /* At this point, we've found the end of the of line. It either
715 points to the '\n' or to one byte after the last byte of the
716 file. */
717 gcc_assert (line_end != NULL);
719 len = line_end - line_start;
721 if (m_line_start_idx < m_nb_read)
722 *line = line_start;
724 ++m_line_num;
726 /* Before we update our line record, make sure the hint about the
727 total number of lines of the file is correct. If it's not, then
728 we give up recording line boundaries from now on. */
729 bool update_line_record = true;
730 if (m_line_num > m_total_lines)
731 update_line_record = false;
733 /* Now update our line record so that re-reading lines from the
734 before m_line_start_idx is faster. */
735 if (update_line_record
736 && m_line_record.length () < line_record_size)
738 /* If the file lines fits in the line record, we just record all
739 its lines ...*/
740 if (m_total_lines <= line_record_size
741 && m_line_num > m_line_record.length ())
742 m_line_record.safe_push
743 (file_cache_slot::line_info (m_line_num,
744 m_line_start_idx,
745 line_end - m_data));
746 else if (m_total_lines > line_record_size)
748 /* ... otherwise, we just scale total_lines down to
749 (line_record_size lines. */
750 size_t n = (m_line_num * line_record_size) / m_total_lines;
751 if (m_line_record.length () == 0
752 || n >= m_line_record.length ())
753 m_line_record.safe_push
754 (file_cache_slot::line_info (m_line_num,
755 m_line_start_idx,
756 line_end - m_data));
760 /* Update m_line_start_idx so that it points to the next line to be
761 read. */
762 if (next_line_start)
763 m_line_start_idx = next_line_start - m_data;
764 else
765 /* We didn't find any terminal '\n'. Let's consider that the end
766 of line is the end of the data in the cache. The next
767 invocation of get_next_line will either read more data from the
768 underlying file or return false early because we've reached the
769 end of the file. */
770 m_line_start_idx = m_nb_read;
772 *line_len = len;
774 return true;
777 /* Consume the next bytes coming from the cache (or from its
778 underlying file if there are remaining unread bytes in the file)
779 until we reach the next end-of-line (or end-of-file). There is no
780 copying from the cache involved. Return TRUE upon successful
781 completion. */
783 bool
784 file_cache_slot::goto_next_line ()
786 char *l;
787 ssize_t len;
789 return get_next_line (&l, &len);
792 /* Read an arbitrary line number LINE_NUM from the file cached in C.
793 If the line was read successfully, *LINE points to the beginning
794 of the line in the file cache and *LINE_LEN is the length of the
795 line. *LINE is not nul-terminated, but may contain zero bytes.
796 *LINE is only valid until the next call of read_line_num.
797 This function returns bool if a line was read. */
799 bool
800 file_cache_slot::read_line_num (size_t line_num,
801 char ** line, ssize_t *line_len)
803 gcc_assert (line_num > 0);
805 if (line_num <= m_line_num)
807 /* We've been asked to read lines that are before m_line_num.
808 So lets use our line record (if it's not empty) to try to
809 avoid re-reading the file from the beginning again. */
811 if (m_line_record.is_empty ())
813 m_line_start_idx = 0;
814 m_line_num = 0;
816 else
818 file_cache_slot::line_info *i = NULL;
819 if (m_total_lines <= line_record_size)
821 /* In languages where the input file is not totally
822 preprocessed up front, the m_total_lines hint
823 can be smaller than the number of lines of the
824 file. In that case, only the first
825 m_total_lines have been recorded.
827 Otherwise, the first m_total_lines we've read have
828 their start/end recorded here. */
829 i = (line_num <= m_total_lines)
830 ? &m_line_record[line_num - 1]
831 : &m_line_record[m_total_lines - 1];
832 gcc_assert (i->line_num <= line_num);
834 else
836 /* So the file had more lines than our line record
837 size. Thus the number of lines we've recorded has
838 been scaled down to line_record_size. Let's
839 pick the start/end of the recorded line that is
840 closest to line_num. */
841 size_t n = (line_num <= m_total_lines)
842 ? line_num * line_record_size / m_total_lines
843 : m_line_record.length () - 1;
844 if (n < m_line_record.length ())
846 i = &m_line_record[n];
847 gcc_assert (i->line_num <= line_num);
851 if (i && i->line_num == line_num)
853 /* We have the start/end of the line. */
854 *line = m_data + i->start_pos;
855 *line_len = i->end_pos - i->start_pos;
856 return true;
859 if (i)
861 m_line_start_idx = i->start_pos;
862 m_line_num = i->line_num - 1;
864 else
866 m_line_start_idx = 0;
867 m_line_num = 0;
872 /* Let's walk from line m_line_num up to line_num - 1, without
873 copying any line. */
874 while (m_line_num < line_num - 1)
875 if (!goto_next_line ())
876 return false;
878 /* The line we want is the next one. Let's read and copy it back to
879 the caller. */
880 return get_next_line (line, line_len);
883 /* Return the physical source line that corresponds to FILE_PATH/LINE.
884 The line is not nul-terminated. The returned pointer is only
885 valid until the next call of location_get_source_line.
886 Note that the line can contain several null characters,
887 so the returned value's length has the actual length of the line.
888 If the function fails, a NULL char_span is returned. */
890 char_span
891 location_get_source_line (const char *file_path, int line)
893 char *buffer = NULL;
894 ssize_t len;
896 if (line == 0)
897 return char_span (NULL, 0);
899 if (file_path == NULL)
900 return char_span (NULL, 0);
902 diagnostic_file_cache_init ();
904 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
905 if (c == NULL)
906 return char_span (NULL, 0);
908 bool read = c->read_line_num (line, &buffer, &len);
909 if (!read)
910 return char_span (NULL, 0);
912 return char_span (buffer, len);
915 /* Determine if FILE_PATH missing a trailing newline on its final line.
916 Only valid to call once all of the file has been loaded, by
917 requesting a line number beyond the end of the file. */
919 bool
920 location_missing_trailing_newline (const char *file_path)
922 diagnostic_file_cache_init ();
924 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
925 if (c == NULL)
926 return false;
928 return c->missing_trailing_newline_p ();
931 /* Test if the location originates from the spelling location of a
932 builtin-tokens. That is, return TRUE if LOC is a (possibly
933 virtual) location of a built-in token that appears in the expansion
934 list of a macro. Please note that this function also works on
935 tokens that result from built-in tokens. For instance, the
936 function would return true if passed a token "4" that is the result
937 of the expansion of the built-in __LINE__ macro. */
938 bool
939 is_location_from_builtin_token (location_t loc)
941 const line_map_ordinary *map = NULL;
942 loc = linemap_resolve_location (line_table, loc,
943 LRK_SPELLING_LOCATION, &map);
944 return loc == BUILTINS_LOCATION;
947 /* Expand the source location LOC into a human readable location. If
948 LOC is virtual, it resolves to the expansion point of the involved
949 macro. If LOC resolves to a builtin location, the file name of the
950 readable location is set to the string "<built-in>". */
952 expanded_location
953 expand_location (location_t loc)
955 return expand_location_1 (loc, /*expansion_point_p=*/true,
956 LOCATION_ASPECT_CARET);
959 /* Expand the source location LOC into a human readable location. If
960 LOC is virtual, it resolves to the expansion location of the
961 relevant macro. If LOC resolves to a builtin location, the file
962 name of the readable location is set to the string
963 "<built-in>". */
965 expanded_location
966 expand_location_to_spelling_point (location_t loc,
967 enum location_aspect aspect)
969 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
972 /* The rich_location class within libcpp requires a way to expand
973 location_t instances, and relies on the client code
974 providing a symbol named
975 linemap_client_expand_location_to_spelling_point
976 to do this.
978 This is the implementation for libcommon.a (all host binaries),
979 which simply calls into expand_location_1. */
981 expanded_location
982 linemap_client_expand_location_to_spelling_point (location_t loc,
983 enum location_aspect aspect)
985 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
989 /* If LOCATION is in a system header and if it is a virtual location for
990 a token coming from the expansion of a macro, unwind it to the
991 location of the expansion point of the macro. Otherwise, just return
992 LOCATION.
994 This is used for instance when we want to emit diagnostics about a
995 token that may be located in a macro that is itself defined in a
996 system header, for example, for the NULL macro. In such a case, if
997 LOCATION were passed directly to diagnostic functions such as
998 warning_at, the diagnostic would be suppressed (unless
999 -Wsystem-headers). */
1001 location_t
1002 expansion_point_location_if_in_system_header (location_t location)
1004 if (in_system_header_at (location))
1005 location = linemap_resolve_location (line_table, location,
1006 LRK_MACRO_EXPANSION_POINT,
1007 NULL);
1008 return location;
1011 /* If LOCATION is a virtual location for a token coming from the expansion
1012 of a macro, unwind to the location of the expansion point of the macro. */
1014 location_t
1015 expansion_point_location (location_t location)
1017 return linemap_resolve_location (line_table, location,
1018 LRK_MACRO_EXPANSION_POINT, NULL);
1021 /* Construct a location with caret at CARET, ranging from START to
1022 finish e.g.
1024 11111111112
1025 12345678901234567890
1027 523 return foo + bar;
1028 ~~~~^~~~~
1031 The location's caret is at the "+", line 523 column 15, but starts
1032 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
1033 of "bar" at column 19. */
1035 location_t
1036 make_location (location_t caret, location_t start, location_t finish)
1038 location_t pure_loc = get_pure_location (caret);
1039 source_range src_range;
1040 src_range.m_start = get_start (start);
1041 src_range.m_finish = get_finish (finish);
1042 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
1043 pure_loc,
1044 src_range,
1045 NULL);
1046 return combined_loc;
1049 /* Same as above, but taking a source range rather than two locations. */
1051 location_t
1052 make_location (location_t caret, source_range src_range)
1054 location_t pure_loc = get_pure_location (caret);
1055 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
1058 /* An expanded_location stores the column in byte units. This function
1059 converts that column to display units. That requires reading the associated
1060 source line in order to calculate the display width. If that cannot be done
1061 for any reason, then returns the byte column as a fallback. */
1063 location_compute_display_column (expanded_location exploc, int tabstop)
1065 if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1066 return exploc.column;
1067 char_span line = location_get_source_line (exploc.file, exploc.line);
1068 /* If line is NULL, this function returns exploc.column which is the
1069 desired fallback. */
1070 return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
1071 exploc.column, tabstop);
1074 /* Dump statistics to stderr about the memory usage of the line_table
1075 set of line maps. This also displays some statistics about macro
1076 expansion. */
1078 void
1079 dump_line_table_statistics (void)
1081 struct linemap_stats s;
1082 long total_used_map_size,
1083 macro_maps_size,
1084 total_allocated_map_size;
1086 memset (&s, 0, sizeof (s));
1088 linemap_get_statistics (line_table, &s);
1090 macro_maps_size = s.macro_maps_used_size
1091 + s.macro_maps_locations_size;
1093 total_allocated_map_size = s.ordinary_maps_allocated_size
1094 + s.macro_maps_allocated_size
1095 + s.macro_maps_locations_size;
1097 total_used_map_size = s.ordinary_maps_used_size
1098 + s.macro_maps_used_size
1099 + s.macro_maps_locations_size;
1101 fprintf (stderr, "Number of expanded macros: %5ld\n",
1102 s.num_expanded_macros);
1103 if (s.num_expanded_macros != 0)
1104 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
1105 s.num_macro_tokens / s.num_expanded_macros);
1106 fprintf (stderr,
1107 "\nLine Table allocations during the "
1108 "compilation process\n");
1109 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
1110 SIZE_AMOUNT (s.num_ordinary_maps_used));
1111 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
1112 SIZE_AMOUNT (s.ordinary_maps_used_size));
1113 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
1114 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1115 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
1116 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1117 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
1118 SIZE_AMOUNT (s.num_macro_maps_used));
1119 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
1120 SIZE_AMOUNT (s.macro_maps_used_size));
1121 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
1122 SIZE_AMOUNT (s.macro_maps_locations_size));
1123 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
1124 SIZE_AMOUNT (macro_maps_size));
1125 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
1126 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1127 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
1128 SIZE_AMOUNT (total_allocated_map_size));
1129 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
1130 SIZE_AMOUNT (total_used_map_size));
1131 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
1132 SIZE_AMOUNT (s.adhoc_table_size));
1133 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
1134 SIZE_AMOUNT (s.adhoc_table_entries_used));
1135 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
1136 SIZE_AMOUNT (line_table->num_optimized_ranges));
1137 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
1138 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
1140 fprintf (stderr, "\n");
1143 /* Get location one beyond the final location in ordinary map IDX. */
1145 static location_t
1146 get_end_location (class line_maps *set, unsigned int idx)
1148 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1149 return set->highest_location;
1151 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1152 return MAP_START_LOCATION (next_map);
1155 /* Helper function for write_digit_row. */
1157 static void
1158 write_digit (FILE *stream, int digit)
1160 fputc ('0' + (digit % 10), stream);
1163 /* Helper function for dump_location_info.
1164 Write a row of numbers to STREAM, numbering a source line,
1165 giving the units, tens, hundreds etc of the column number. */
1167 static void
1168 write_digit_row (FILE *stream, int indent,
1169 const line_map_ordinary *map,
1170 location_t loc, int max_col, int divisor)
1172 fprintf (stream, "%*c", indent, ' ');
1173 fprintf (stream, "|");
1174 for (int column = 1; column < max_col; column++)
1176 location_t column_loc = loc + (column << map->m_range_bits);
1177 write_digit (stream, column_loc / divisor);
1179 fprintf (stream, "\n");
1182 /* Write a half-closed (START) / half-open (END) interval of
1183 location_t to STREAM. */
1185 static void
1186 dump_location_range (FILE *stream,
1187 location_t start, location_t end)
1189 fprintf (stream,
1190 " location_t interval: %u <= loc < %u\n",
1191 start, end);
1194 /* Write a labelled description of a half-closed (START) / half-open (END)
1195 interval of location_t to STREAM. */
1197 static void
1198 dump_labelled_location_range (FILE *stream,
1199 const char *name,
1200 location_t start, location_t end)
1202 fprintf (stream, "%s\n", name);
1203 dump_location_range (stream, start, end);
1204 fprintf (stream, "\n");
1207 /* Write a visualization of the locations in the line_table to STREAM. */
1209 void
1210 dump_location_info (FILE *stream)
1212 /* Visualize the reserved locations. */
1213 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1214 0, RESERVED_LOCATION_COUNT);
1216 /* Visualize the ordinary line_map instances, rendering the sources. */
1217 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1219 location_t end_location = get_end_location (line_table, idx);
1220 /* half-closed: doesn't include this one. */
1222 const line_map_ordinary *map
1223 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1224 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1225 dump_location_range (stream,
1226 MAP_START_LOCATION (map), end_location);
1227 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1228 fprintf (stream, " starting at line: %i\n",
1229 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1230 fprintf (stream, " column and range bits: %i\n",
1231 map->m_column_and_range_bits);
1232 fprintf (stream, " column bits: %i\n",
1233 map->m_column_and_range_bits - map->m_range_bits);
1234 fprintf (stream, " range bits: %i\n",
1235 map->m_range_bits);
1236 const char * reason;
1237 switch (map->reason) {
1238 case LC_ENTER:
1239 reason = "LC_ENTER";
1240 break;
1241 case LC_LEAVE:
1242 reason = "LC_LEAVE";
1243 break;
1244 case LC_RENAME:
1245 reason = "LC_RENAME";
1246 break;
1247 case LC_RENAME_VERBATIM:
1248 reason = "LC_RENAME_VERBATIM";
1249 break;
1250 case LC_ENTER_MACRO:
1251 reason = "LC_RENAME_MACRO";
1252 break;
1253 default:
1254 reason = "Unknown";
1256 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1258 const line_map_ordinary *includer_map
1259 = linemap_included_from_linemap (line_table, map);
1260 fprintf (stream, " included from location: %d",
1261 linemap_included_from (map));
1262 if (includer_map) {
1263 fprintf (stream, " (in ordinary map %d)",
1264 int (includer_map - line_table->info_ordinary.maps));
1266 fprintf (stream, "\n");
1268 /* Render the span of source lines that this "map" covers. */
1269 for (location_t loc = MAP_START_LOCATION (map);
1270 loc < end_location;
1271 loc += (1 << map->m_range_bits) )
1273 gcc_assert (pure_location_p (line_table, loc) );
1275 expanded_location exploc
1276 = linemap_expand_location (line_table, map, loc);
1278 if (exploc.column == 0)
1280 /* Beginning of a new source line: draw the line. */
1282 char_span line_text = location_get_source_line (exploc.file,
1283 exploc.line);
1284 if (!line_text)
1285 break;
1286 fprintf (stream,
1287 "%s:%3i|loc:%5i|%.*s\n",
1288 exploc.file, exploc.line,
1289 loc,
1290 (int)line_text.length (), line_text.get_buffer ());
1292 /* "loc" is at column 0, which means "the whole line".
1293 Render the locations *within* the line, by underlining
1294 it, showing the location_t numeric values
1295 at each column. */
1296 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1297 if (max_col > line_text.length ())
1298 max_col = line_text.length () + 1;
1300 int len_lnum = num_digits (exploc.line);
1301 if (len_lnum < 3)
1302 len_lnum = 3;
1303 int len_loc = num_digits (loc);
1304 if (len_loc < 5)
1305 len_loc = 5;
1307 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1309 /* Thousands. */
1310 if (end_location > 999)
1311 write_digit_row (stream, indent, map, loc, max_col, 1000);
1313 /* Hundreds. */
1314 if (end_location > 99)
1315 write_digit_row (stream, indent, map, loc, max_col, 100);
1317 /* Tens. */
1318 write_digit_row (stream, indent, map, loc, max_col, 10);
1320 /* Units. */
1321 write_digit_row (stream, indent, map, loc, max_col, 1);
1324 fprintf (stream, "\n");
1327 /* Visualize unallocated values. */
1328 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1329 line_table->highest_location,
1330 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1332 /* Visualize the macro line_map instances, rendering the sources. */
1333 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1335 /* Each macro map that is allocated owns location_t values
1336 that are *lower* that the one before them.
1337 Hence it's meaningful to view them either in order of ascending
1338 source locations, or in order of ascending macro map index. */
1339 const bool ascending_location_ts = true;
1340 unsigned int idx = (ascending_location_ts
1341 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1342 : i);
1343 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1344 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1345 idx,
1346 linemap_map_get_macro_name (map),
1347 MACRO_MAP_NUM_MACRO_TOKENS (map));
1348 dump_location_range (stream,
1349 map->start_location,
1350 (map->start_location
1351 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1352 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1353 "expansion point is location %i",
1354 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1355 fprintf (stream, " map->start_location: %u\n",
1356 map->start_location);
1358 fprintf (stream, " macro_locations:\n");
1359 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1361 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1362 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1364 /* linemap_add_macro_token encodes token numbers in an expansion
1365 by putting them after MAP_START_LOCATION. */
1367 /* I'm typically seeing 4 uninitialized entries at the end of
1368 0xafafafaf.
1369 This appears to be due to macro.c:replace_args
1370 adding 2 extra args for padding tokens; presumably there may
1371 be a leading and/or trailing padding token injected,
1372 each for 2 more location slots.
1373 This would explain there being up to 4 location_ts slots
1374 that may be uninitialized. */
1376 fprintf (stream, " %u: %u, %u\n",
1380 if (x == y)
1382 if (x < MAP_START_LOCATION (map))
1383 inform (x, "token %u has %<x-location == y-location == %u%>",
1384 i, x);
1385 else
1386 fprintf (stream,
1387 "x-location == y-location == %u encodes token # %u\n",
1388 x, x - MAP_START_LOCATION (map));
1390 else
1392 inform (x, "token %u has %<x-location == %u%>", i, x);
1393 inform (x, "token %u has %<y-location == %u%>", i, y);
1396 fprintf (stream, "\n");
1399 /* It appears that MAX_LOCATION_T itself is never assigned to a
1400 macro map, presumably due to an off-by-one error somewhere
1401 between the logic in linemap_enter_macro and
1402 LINEMAPS_MACRO_LOWEST_LOCATION. */
1403 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1404 MAX_LOCATION_T,
1405 MAX_LOCATION_T + 1);
1407 /* Visualize ad-hoc values. */
1408 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1409 MAX_LOCATION_T + 1, UINT_MAX);
1412 /* string_concat's constructor. */
1414 string_concat::string_concat (int num, location_t *locs)
1415 : m_num (num)
1417 m_locs = ggc_vec_alloc <location_t> (num);
1418 for (int i = 0; i < num; i++)
1419 m_locs[i] = locs[i];
1422 /* string_concat_db's constructor. */
1424 string_concat_db::string_concat_db ()
1426 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1429 /* Record that a string concatenation occurred, covering NUM
1430 string literal tokens. LOCS is an array of size NUM, containing the
1431 locations of the tokens. A copy of LOCS is taken. */
1433 void
1434 string_concat_db::record_string_concatenation (int num, location_t *locs)
1436 gcc_assert (num > 1);
1437 gcc_assert (locs);
1439 location_t key_loc = get_key_loc (locs[0]);
1440 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1441 any data now recorded under key 'key_loc' would be overwritten by a
1442 subsequent call with the same key 'key_loc'. */
1443 if (RESERVED_LOCATION_P (key_loc))
1444 return;
1446 string_concat *concat
1447 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1448 m_table->put (key_loc, concat);
1451 /* Determine if LOC was the location of the initial token of a
1452 concatenation of string literal tokens.
1453 If so, *OUT_NUM is written to with the number of tokens, and
1454 *OUT_LOCS with the location of an array of locations of the
1455 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1456 storage owned by the string_concat_db.
1457 Otherwise, return false. */
1459 bool
1460 string_concat_db::get_string_concatenation (location_t loc,
1461 int *out_num,
1462 location_t **out_locs)
1464 gcc_assert (out_num);
1465 gcc_assert (out_locs);
1467 location_t key_loc = get_key_loc (loc);
1468 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1469 discussion in 'string_concat_db::record_string_concatenation'. */
1470 if (RESERVED_LOCATION_P (key_loc))
1471 return false;
1473 string_concat **concat = m_table->get (key_loc);
1474 if (!concat)
1475 return false;
1477 *out_num = (*concat)->m_num;
1478 *out_locs =(*concat)->m_locs;
1479 return true;
1482 /* Internal function. Canonicalize LOC into a form suitable for
1483 use as a key within the database, stripping away macro expansion,
1484 ad-hoc information, and range information, using the location of
1485 the start of LOC within an ordinary linemap. */
1487 location_t
1488 string_concat_db::get_key_loc (location_t loc)
1490 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1491 NULL);
1493 loc = get_range_from_loc (line_table, loc).m_start;
1495 return loc;
1498 /* Helper class for use within get_substring_ranges_for_loc.
1499 An vec of cpp_string with responsibility for releasing all of the
1500 str->text for each str in the vector. */
1502 class auto_cpp_string_vec : public auto_vec <cpp_string>
1504 public:
1505 auto_cpp_string_vec (int alloc)
1506 : auto_vec <cpp_string> (alloc) {}
1508 ~auto_cpp_string_vec ()
1510 /* Clean up the copies within this vec. */
1511 int i;
1512 cpp_string *str;
1513 FOR_EACH_VEC_ELT (*this, i, str)
1514 free (const_cast <unsigned char *> (str->text));
1518 /* Attempt to populate RANGES with source location information on the
1519 individual characters within the string literal found at STRLOC.
1520 If CONCATS is non-NULL, then any string literals that the token at
1521 STRLOC was concatenated with are also added to RANGES.
1523 Return NULL if successful, or an error message if any errors occurred (in
1524 which case RANGES may be only partially populated and should not
1525 be used).
1527 This is implemented by re-parsing the relevant source line(s). */
1529 static const char *
1530 get_substring_ranges_for_loc (cpp_reader *pfile,
1531 string_concat_db *concats,
1532 location_t strloc,
1533 enum cpp_ttype type,
1534 cpp_substring_ranges &ranges)
1536 gcc_assert (pfile);
1538 if (strloc == UNKNOWN_LOCATION)
1539 return "unknown location";
1541 /* Reparsing the strings requires accurate location information.
1542 If -ftrack-macro-expansion has been overridden from its default
1543 of 2, then we might have a location of a macro expansion point,
1544 rather than the location of the literal itself.
1545 Avoid this by requiring that we have full macro expansion tracking
1546 for substring locations to be available. */
1547 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1548 return "track_macro_expansion != 2";
1550 /* If #line or # 44 "file"-style directives are present, then there's
1551 no guarantee that the line numbers we have can be used to locate
1552 the strings. For example, we might have a .i file with # directives
1553 pointing back to lines within a .c file, but the .c file might
1554 have been edited since the .i file was created.
1555 In such a case, the safest course is to disable on-demand substring
1556 locations. */
1557 if (line_table->seen_line_directive)
1558 return "seen line directive";
1560 /* If string concatenation has occurred at STRLOC, get the locations
1561 of all of the literal tokens making up the compound string.
1562 Otherwise, just use STRLOC. */
1563 int num_locs = 1;
1564 location_t *strlocs = &strloc;
1565 if (concats)
1566 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1568 auto_cpp_string_vec strs (num_locs);
1569 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1570 for (int i = 0; i < num_locs; i++)
1572 /* Get range of strloc. We will use it to locate the start and finish
1573 of the literal token within the line. */
1574 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1576 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1578 /* If the string token was within a macro expansion, then we can
1579 cope with it for the simple case where we have a single token.
1580 Otherwise, bail out. */
1581 if (src_range.m_start != src_range.m_finish)
1582 return "macro expansion";
1584 else
1586 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1587 /* If so, we can't reliably determine where the token started within
1588 its line. */
1589 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1591 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1592 /* If so, we can't reliably determine where the token finished
1593 within its line. */
1594 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1597 expanded_location start
1598 = expand_location_to_spelling_point (src_range.m_start,
1599 LOCATION_ASPECT_START);
1600 expanded_location finish
1601 = expand_location_to_spelling_point (src_range.m_finish,
1602 LOCATION_ASPECT_FINISH);
1603 if (start.file != finish.file)
1604 return "range endpoints are in different files";
1605 if (start.line != finish.line)
1606 return "range endpoints are on different lines";
1607 if (start.column > finish.column)
1608 return "range endpoints are reversed";
1610 char_span line = location_get_source_line (start.file, start.line);
1611 if (!line)
1612 return "unable to read source line";
1614 /* Determine the location of the literal (including quotes
1615 and leading prefix chars, such as the 'u' in a u""
1616 token). */
1617 size_t literal_length = finish.column - start.column + 1;
1619 /* Ensure that we don't crash if we got the wrong location. */
1620 if (start.column < 1)
1621 return "zero start column";
1622 if (line.length () < (start.column - 1 + literal_length))
1623 return "line is not wide enough";
1625 char_span literal = line.subspan (start.column - 1, literal_length);
1627 cpp_string from;
1628 from.len = literal_length;
1629 /* Make a copy of the literal, to avoid having to rely on
1630 the lifetime of the copy of the line within the cache.
1631 This will be released by the auto_cpp_string_vec dtor. */
1632 from.text = (unsigned char *)literal.xstrdup ();
1633 strs.safe_push (from);
1635 /* For very long lines, a new linemap could have started
1636 halfway through the token.
1637 Ensure that the loc_reader uses the linemap of the
1638 *end* of the token for its start location. */
1639 const line_map_ordinary *start_ord_map;
1640 linemap_resolve_location (line_table, src_range.m_start,
1641 LRK_SPELLING_LOCATION, &start_ord_map);
1642 const line_map_ordinary *final_ord_map;
1643 linemap_resolve_location (line_table, src_range.m_finish,
1644 LRK_SPELLING_LOCATION, &final_ord_map);
1645 if (start_ord_map == NULL || final_ord_map == NULL)
1646 return "failed to get ordinary maps";
1647 /* Bulletproofing. We ought to only have different ordinary maps
1648 for start vs finish due to line-length jumps. */
1649 if (start_ord_map != final_ord_map
1650 && start_ord_map->to_file != final_ord_map->to_file)
1651 return "start and finish are spelled in different ordinary maps";
1652 /* The file from linemap_resolve_location ought to match that from
1653 expand_location_to_spelling_point. */
1654 if (start_ord_map->to_file != start.file)
1655 return "mismatching file after resolving linemap";
1657 location_t start_loc
1658 = linemap_position_for_line_and_column (line_table, final_ord_map,
1659 start.line, start.column);
1661 cpp_string_location_reader loc_reader (start_loc, line_table);
1662 loc_readers.safe_push (loc_reader);
1665 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1666 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1667 loc_readers.address (),
1668 num_locs, &ranges, type);
1669 if (err)
1670 return err;
1672 /* Success: "ranges" should now contain information on the string. */
1673 return NULL;
1676 /* Attempt to populate *OUT_LOC with source location information on the
1677 given characters within the string literal found at STRLOC.
1678 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1679 character set.
1681 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1682 and string literal "012345\n789"
1683 *OUT_LOC is written to with:
1684 "012345\n789"
1685 ~^~~~~
1687 If CONCATS is non-NULL, then any string literals that the token at
1688 STRLOC was concatenated with are also considered.
1690 This is implemented by re-parsing the relevant source line(s).
1692 Return NULL if successful, or an error message if any errors occurred.
1693 Error messages are intended for GCC developers (to help debugging) rather
1694 than for end-users. */
1696 const char *
1697 get_location_within_string (cpp_reader *pfile,
1698 string_concat_db *concats,
1699 location_t strloc,
1700 enum cpp_ttype type,
1701 int caret_idx, int start_idx, int end_idx,
1702 location_t *out_loc)
1704 gcc_checking_assert (caret_idx >= 0);
1705 gcc_checking_assert (start_idx >= 0);
1706 gcc_checking_assert (end_idx >= 0);
1707 gcc_assert (out_loc);
1709 cpp_substring_ranges ranges;
1710 const char *err
1711 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1712 if (err)
1713 return err;
1715 if (caret_idx >= ranges.get_num_ranges ())
1716 return "caret_idx out of range";
1717 if (start_idx >= ranges.get_num_ranges ())
1718 return "start_idx out of range";
1719 if (end_idx >= ranges.get_num_ranges ())
1720 return "end_idx out of range";
1722 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1723 ranges.get_range (start_idx).m_start,
1724 ranges.get_range (end_idx).m_finish);
1725 return NULL;
1728 #if CHECKING_P
1730 namespace selftest {
1732 /* Selftests of location handling. */
1734 /* Attempt to populate *OUT_RANGE with source location information on the
1735 given character within the string literal found at STRLOC.
1736 CHAR_IDX refers to an offset within the execution character set.
1737 If CONCATS is non-NULL, then any string literals that the token at
1738 STRLOC was concatenated with are also considered.
1740 This is implemented by re-parsing the relevant source line(s).
1742 Return NULL if successful, or an error message if any errors occurred.
1743 Error messages are intended for GCC developers (to help debugging) rather
1744 than for end-users. */
1746 static const char *
1747 get_source_range_for_char (cpp_reader *pfile,
1748 string_concat_db *concats,
1749 location_t strloc,
1750 enum cpp_ttype type,
1751 int char_idx,
1752 source_range *out_range)
1754 gcc_checking_assert (char_idx >= 0);
1755 gcc_assert (out_range);
1757 cpp_substring_ranges ranges;
1758 const char *err
1759 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1760 if (err)
1761 return err;
1763 if (char_idx >= ranges.get_num_ranges ())
1764 return "char_idx out of range";
1766 *out_range = ranges.get_range (char_idx);
1767 return NULL;
1770 /* As get_source_range_for_char, but write to *OUT the number
1771 of ranges that are available. */
1773 static const char *
1774 get_num_source_ranges_for_substring (cpp_reader *pfile,
1775 string_concat_db *concats,
1776 location_t strloc,
1777 enum cpp_ttype type,
1778 int *out)
1780 gcc_assert (out);
1782 cpp_substring_ranges ranges;
1783 const char *err
1784 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1786 if (err)
1787 return err;
1789 *out = ranges.get_num_ranges ();
1790 return NULL;
1793 /* Selftests of location handling. */
1795 /* Verify that compare() on linenum_type handles comparisons over the full
1796 range of the type. */
1798 static void
1799 test_linenum_comparisons ()
1801 linenum_type min_line (0);
1802 linenum_type max_line (0xffffffff);
1803 ASSERT_EQ (0, compare (min_line, min_line));
1804 ASSERT_EQ (0, compare (max_line, max_line));
1806 ASSERT_GT (compare (max_line, min_line), 0);
1807 ASSERT_LT (compare (min_line, max_line), 0);
1810 /* Helper function for verifying location data: when location_t
1811 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1812 as having column 0. */
1814 static bool
1815 should_have_column_data_p (location_t loc)
1817 if (IS_ADHOC_LOC (loc))
1818 loc = get_location_from_adhoc_loc (line_table, loc);
1819 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1820 return false;
1821 return true;
1824 /* Selftest for should_have_column_data_p. */
1826 static void
1827 test_should_have_column_data_p ()
1829 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1830 ASSERT_TRUE
1831 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1832 ASSERT_FALSE
1833 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1836 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1837 on LOC. */
1839 static void
1840 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1841 location_t loc)
1843 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1844 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1845 /* If location_t values are sufficiently high, then column numbers
1846 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1847 When close to the threshold, column numbers *may* be present: if
1848 the final linemap before the threshold contains a line that straddles
1849 the threshold, locations in that line have column information. */
1850 if (should_have_column_data_p (loc))
1851 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1854 /* Various selftests involve constructing a line table and one or more
1855 line maps within it.
1857 For maximum test coverage we want to run these tests with a variety
1858 of situations:
1859 - line_table->default_range_bits: some frontends use a non-zero value
1860 and others use zero
1861 - the fallback modes within line-map.c: there are various threshold
1862 values for location_t beyond line-map.c changes
1863 behavior (disabling of the range-packing optimization, disabling
1864 of column-tracking). We can exercise these by starting the line_table
1865 at interesting values at or near these thresholds.
1867 The following struct describes a particular case within our test
1868 matrix. */
1870 class line_table_case
1872 public:
1873 line_table_case (int default_range_bits, int base_location)
1874 : m_default_range_bits (default_range_bits),
1875 m_base_location (base_location)
1878 int m_default_range_bits;
1879 int m_base_location;
1882 /* Constructor. Store the old value of line_table, and create a new
1883 one, using sane defaults. */
1885 line_table_test::line_table_test ()
1887 gcc_assert (saved_line_table == NULL);
1888 saved_line_table = line_table;
1889 line_table = ggc_alloc<line_maps> ();
1890 linemap_init (line_table, BUILTINS_LOCATION);
1891 gcc_assert (saved_line_table->reallocator);
1892 line_table->reallocator = saved_line_table->reallocator;
1893 gcc_assert (saved_line_table->round_alloc_size);
1894 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1895 line_table->default_range_bits = 0;
1898 /* Constructor. Store the old value of line_table, and create a new
1899 one, using the sitation described in CASE_. */
1901 line_table_test::line_table_test (const line_table_case &case_)
1903 gcc_assert (saved_line_table == NULL);
1904 saved_line_table = line_table;
1905 line_table = ggc_alloc<line_maps> ();
1906 linemap_init (line_table, BUILTINS_LOCATION);
1907 gcc_assert (saved_line_table->reallocator);
1908 line_table->reallocator = saved_line_table->reallocator;
1909 gcc_assert (saved_line_table->round_alloc_size);
1910 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1911 line_table->default_range_bits = case_.m_default_range_bits;
1912 if (case_.m_base_location)
1914 line_table->highest_location = case_.m_base_location;
1915 line_table->highest_line = case_.m_base_location;
1919 /* Destructor. Restore the old value of line_table. */
1921 line_table_test::~line_table_test ()
1923 gcc_assert (saved_line_table != NULL);
1924 line_table = saved_line_table;
1925 saved_line_table = NULL;
1928 /* Verify basic operation of ordinary linemaps. */
1930 static void
1931 test_accessing_ordinary_linemaps (const line_table_case &case_)
1933 line_table_test ltt (case_);
1935 /* Build a simple linemap describing some locations. */
1936 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1938 linemap_line_start (line_table, 1, 100);
1939 location_t loc_a = linemap_position_for_column (line_table, 1);
1940 location_t loc_b = linemap_position_for_column (line_table, 23);
1942 linemap_line_start (line_table, 2, 100);
1943 location_t loc_c = linemap_position_for_column (line_table, 1);
1944 location_t loc_d = linemap_position_for_column (line_table, 17);
1946 /* Example of a very long line. */
1947 linemap_line_start (line_table, 3, 2000);
1948 location_t loc_e = linemap_position_for_column (line_table, 700);
1950 /* Transitioning back to a short line. */
1951 linemap_line_start (line_table, 4, 0);
1952 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1954 if (should_have_column_data_p (loc_back_to_short))
1956 /* Verify that we switched to short lines in the linemap. */
1957 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1958 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1961 /* Example of a line that will eventually be seen to be longer
1962 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1963 below that. */
1964 linemap_line_start (line_table, 5, 2000);
1966 location_t loc_start_of_very_long_line
1967 = linemap_position_for_column (line_table, 2000);
1968 location_t loc_too_wide
1969 = linemap_position_for_column (line_table, 4097);
1970 location_t loc_too_wide_2
1971 = linemap_position_for_column (line_table, 4098);
1973 /* ...and back to a sane line length. */
1974 linemap_line_start (line_table, 6, 100);
1975 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1977 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1979 /* Multiple files. */
1980 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1981 linemap_line_start (line_table, 1, 200);
1982 location_t loc_f = linemap_position_for_column (line_table, 150);
1983 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1985 /* Verify that we can recover the location info. */
1986 assert_loceq ("foo.c", 1, 1, loc_a);
1987 assert_loceq ("foo.c", 1, 23, loc_b);
1988 assert_loceq ("foo.c", 2, 1, loc_c);
1989 assert_loceq ("foo.c", 2, 17, loc_d);
1990 assert_loceq ("foo.c", 3, 700, loc_e);
1991 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1993 /* In the very wide line, the initial location should be fully tracked. */
1994 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1995 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1996 be disabled. */
1997 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1998 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1999 /*...and column-tracking should be re-enabled for subsequent lines. */
2000 assert_loceq ("foo.c", 6, 10, loc_sane_again);
2002 assert_loceq ("bar.c", 1, 150, loc_f);
2004 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2005 ASSERT_TRUE (pure_location_p (line_table, loc_a));
2007 /* Verify using make_location to build a range, and extracting data
2008 back from it. */
2009 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2010 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2011 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2012 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2013 ASSERT_EQ (loc_b, src_range.m_start);
2014 ASSERT_EQ (loc_d, src_range.m_finish);
2017 /* Verify various properties of UNKNOWN_LOCATION. */
2019 static void
2020 test_unknown_location ()
2022 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2023 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2024 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2027 /* Verify various properties of BUILTINS_LOCATION. */
2029 static void
2030 test_builtins ()
2032 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
2033 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2036 /* Regression test for make_location.
2037 Ensure that we use pure locations for the start/finish of the range,
2038 rather than storing a packed or ad-hoc range as the start/finish. */
2040 static void
2041 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2043 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2044 with C++ frontend.
2045 ....................0000000001111111111222.
2046 ....................1234567890123456789012. */
2047 const char *content = " r += !aaa == bbb;\n";
2048 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2049 line_table_test ltt (case_);
2050 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2052 const location_t c11 = linemap_position_for_column (line_table, 11);
2053 const location_t c12 = linemap_position_for_column (line_table, 12);
2054 const location_t c13 = linemap_position_for_column (line_table, 13);
2055 const location_t c14 = linemap_position_for_column (line_table, 14);
2056 const location_t c21 = linemap_position_for_column (line_table, 21);
2058 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2059 return;
2061 /* Use column 13 for the caret location, arbitrarily, to verify that we
2062 handle start != caret. */
2063 const location_t aaa = make_location (c13, c12, c14);
2064 ASSERT_EQ (c13, get_pure_location (aaa));
2065 ASSERT_EQ (c12, get_start (aaa));
2066 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2067 ASSERT_EQ (c14, get_finish (aaa));
2068 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2070 /* Make a location using a location with a range as the start-point. */
2071 const location_t not_aaa = make_location (c11, aaa, c14);
2072 ASSERT_EQ (c11, get_pure_location (not_aaa));
2073 /* It should use the start location of the range, not store the range
2074 itself. */
2075 ASSERT_EQ (c12, get_start (not_aaa));
2076 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2077 ASSERT_EQ (c14, get_finish (not_aaa));
2078 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2080 /* Similarly, make a location with a range as the end-point. */
2081 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2082 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2083 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2084 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2085 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2086 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2087 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2088 /* It should use the finish location of the range, not store the range
2089 itself. */
2090 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2091 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2092 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2093 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2094 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2097 /* Verify reading of input files (e.g. for caret-based diagnostics). */
2099 static void
2100 test_reading_source_line ()
2102 /* Create a tempfile and write some text to it. */
2103 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2104 "01234567890123456789\n"
2105 "This is the test text\n"
2106 "This is the 3rd line");
2108 /* Read back a specific line from the tempfile. */
2109 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2110 ASSERT_TRUE (source_line);
2111 ASSERT_TRUE (source_line.get_buffer () != NULL);
2112 ASSERT_EQ (20, source_line.length ());
2113 ASSERT_TRUE (!strncmp ("This is the 3rd line",
2114 source_line.get_buffer (), source_line.length ()));
2116 source_line = location_get_source_line (tmp.get_filename (), 2);
2117 ASSERT_TRUE (source_line);
2118 ASSERT_TRUE (source_line.get_buffer () != NULL);
2119 ASSERT_EQ (21, source_line.length ());
2120 ASSERT_TRUE (!strncmp ("This is the test text",
2121 source_line.get_buffer (), source_line.length ()));
2123 source_line = location_get_source_line (tmp.get_filename (), 4);
2124 ASSERT_FALSE (source_line);
2125 ASSERT_TRUE (source_line.get_buffer () == NULL);
2128 /* Tests of lexing. */
2130 /* Verify that token TOK from PARSER has cpp_token_as_text
2131 equal to EXPECTED_TEXT. */
2133 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
2134 SELFTEST_BEGIN_STMT \
2135 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
2136 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
2137 SELFTEST_END_STMT
2139 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2140 and ranges from EXP_START_COL to EXP_FINISH_COL.
2141 Use LOC as the effective location of the selftest. */
2143 static void
2144 assert_token_loc_eq (const location &loc,
2145 const cpp_token *tok,
2146 const char *exp_filename, int exp_linenum,
2147 int exp_start_col, int exp_finish_col)
2149 location_t tok_loc = tok->src_loc;
2150 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2151 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2153 /* If location_t values are sufficiently high, then column numbers
2154 will be unavailable. */
2155 if (!should_have_column_data_p (tok_loc))
2156 return;
2158 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2159 source_range tok_range = get_range_from_loc (line_table, tok_loc);
2160 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2161 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2164 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2165 SELFTEST_LOCATION as the effective location of the selftest. */
2167 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2168 EXP_START_COL, EXP_FINISH_COL) \
2169 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2170 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2172 /* Test of lexing a file using libcpp, verifying tokens and their
2173 location information. */
2175 static void
2176 test_lexer (const line_table_case &case_)
2178 /* Create a tempfile and write some text to it. */
2179 const char *content =
2180 /*00000000011111111112222222222333333.3333444444444.455555555556
2181 12345678901234567890123456789012345.6789012345678.901234567890. */
2182 ("test_name /* c-style comment */\n"
2183 " \"test literal\"\n"
2184 " // test c++-style comment\n"
2185 " 42\n");
2186 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2188 line_table_test ltt (case_);
2190 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2192 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2193 ASSERT_NE (fname, NULL);
2195 /* Verify that we get the expected tokens back, with the correct
2196 location information. */
2198 location_t loc;
2199 const cpp_token *tok;
2200 tok = cpp_get_token_with_location (parser, &loc);
2201 ASSERT_NE (tok, NULL);
2202 ASSERT_EQ (tok->type, CPP_NAME);
2203 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2204 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2206 tok = cpp_get_token_with_location (parser, &loc);
2207 ASSERT_NE (tok, NULL);
2208 ASSERT_EQ (tok->type, CPP_STRING);
2209 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2210 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2212 tok = cpp_get_token_with_location (parser, &loc);
2213 ASSERT_NE (tok, NULL);
2214 ASSERT_EQ (tok->type, CPP_NUMBER);
2215 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2216 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2218 tok = cpp_get_token_with_location (parser, &loc);
2219 ASSERT_NE (tok, NULL);
2220 ASSERT_EQ (tok->type, CPP_EOF);
2222 cpp_finish (parser, NULL);
2223 cpp_destroy (parser);
2226 /* Forward decls. */
2228 class lexer_test;
2229 class lexer_test_options;
2231 /* A class for specifying options of a lexer_test.
2232 The "apply" vfunc is called during the lexer_test constructor. */
2234 class lexer_test_options
2236 public:
2237 virtual void apply (lexer_test &) = 0;
2240 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2241 in its dtor.
2243 This is needed by struct lexer_test to ensure that the cleanup of the
2244 cpp_reader happens *after* the cleanup of the temp_source_file. */
2246 class cpp_reader_ptr
2248 public:
2249 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2251 ~cpp_reader_ptr ()
2253 cpp_finish (m_ptr, NULL);
2254 cpp_destroy (m_ptr);
2257 operator cpp_reader * () const { return m_ptr; }
2259 private:
2260 cpp_reader *m_ptr;
2263 /* A struct for writing lexer tests. */
2265 class lexer_test
2267 public:
2268 lexer_test (const line_table_case &case_, const char *content,
2269 lexer_test_options *options);
2270 ~lexer_test ();
2272 const cpp_token *get_token ();
2274 /* The ordering of these fields matters.
2275 The line_table_test must be first, since the cpp_reader_ptr
2276 uses it.
2277 The cpp_reader must be cleaned up *after* the temp_source_file
2278 since the filenames in input.c's input cache are owned by the
2279 cpp_reader; in particular, when ~temp_source_file evicts the
2280 filename the filenames must still be alive. */
2281 line_table_test m_ltt;
2282 cpp_reader_ptr m_parser;
2283 temp_source_file m_tempfile;
2284 string_concat_db m_concats;
2285 bool m_implicitly_expect_EOF;
2288 /* Use an EBCDIC encoding for the execution charset, specifically
2289 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2291 This exercises iconv integration within libcpp.
2292 Not every build of iconv supports the given charset,
2293 so we need to flag this error and handle it gracefully. */
2295 class ebcdic_execution_charset : public lexer_test_options
2297 public:
2298 ebcdic_execution_charset () : m_num_iconv_errors (0)
2300 gcc_assert (s_singleton == NULL);
2301 s_singleton = this;
2303 ~ebcdic_execution_charset ()
2305 gcc_assert (s_singleton == this);
2306 s_singleton = NULL;
2309 void apply (lexer_test &test) FINAL OVERRIDE
2311 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2312 cpp_opts->narrow_charset = "IBM1047";
2314 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2315 callbacks->diagnostic = on_diagnostic;
2318 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2319 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2320 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2321 rich_location *richloc ATTRIBUTE_UNUSED,
2322 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2323 ATTRIBUTE_FPTR_PRINTF(5,0)
2325 gcc_assert (s_singleton);
2326 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2327 const char *msg = "conversion from %s to %s not supported by iconv";
2328 #ifdef ENABLE_NLS
2329 msg = dgettext ("cpplib", msg);
2330 #endif
2331 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2332 when the local iconv build doesn't support the conversion. */
2333 if (strcmp (msgid, msg) == 0)
2335 s_singleton->m_num_iconv_errors++;
2336 return true;
2339 /* Otherwise, we have an unexpected error. */
2340 abort ();
2343 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2345 private:
2346 static ebcdic_execution_charset *s_singleton;
2347 int m_num_iconv_errors;
2350 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2352 /* A lexer_test_options subclass that records a list of diagnostic
2353 messages emitted by the lexer. */
2355 class lexer_diagnostic_sink : public lexer_test_options
2357 public:
2358 lexer_diagnostic_sink ()
2360 gcc_assert (s_singleton == NULL);
2361 s_singleton = this;
2363 ~lexer_diagnostic_sink ()
2365 gcc_assert (s_singleton == this);
2366 s_singleton = NULL;
2368 int i;
2369 char *str;
2370 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2371 free (str);
2374 void apply (lexer_test &test) FINAL OVERRIDE
2376 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2377 callbacks->diagnostic = on_diagnostic;
2380 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2381 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2382 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2383 rich_location *richloc ATTRIBUTE_UNUSED,
2384 const char *msgid, va_list *ap)
2385 ATTRIBUTE_FPTR_PRINTF(5,0)
2387 char *msg = xvasprintf (msgid, *ap);
2388 s_singleton->m_diagnostics.safe_push (msg);
2389 return true;
2392 auto_vec<char *> m_diagnostics;
2394 private:
2395 static lexer_diagnostic_sink *s_singleton;
2398 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2400 /* Constructor. Override line_table with a new instance based on CASE_,
2401 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2402 start parsing the tempfile. */
2404 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2405 lexer_test_options *options)
2406 : m_ltt (case_),
2407 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2408 /* Create a tempfile and write the text to it. */
2409 m_tempfile (SELFTEST_LOCATION, ".c", content),
2410 m_concats (),
2411 m_implicitly_expect_EOF (true)
2413 if (options)
2414 options->apply (*this);
2416 cpp_init_iconv (m_parser);
2418 /* Parse the file. */
2419 const char *fname = cpp_read_main_file (m_parser,
2420 m_tempfile.get_filename ());
2421 ASSERT_NE (fname, NULL);
2424 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2426 lexer_test::~lexer_test ()
2428 location_t loc;
2429 const cpp_token *tok;
2431 if (m_implicitly_expect_EOF)
2433 tok = cpp_get_token_with_location (m_parser, &loc);
2434 ASSERT_NE (tok, NULL);
2435 ASSERT_EQ (tok->type, CPP_EOF);
2439 /* Get the next token from m_parser. */
2441 const cpp_token *
2442 lexer_test::get_token ()
2444 location_t loc;
2445 const cpp_token *tok;
2447 tok = cpp_get_token_with_location (m_parser, &loc);
2448 ASSERT_NE (tok, NULL);
2449 return tok;
2452 /* Verify that locations within string literals are correctly handled. */
2454 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2455 using the string concatenation database for TEST.
2457 Assert that the character at index IDX is on EXPECTED_LINE,
2458 and that it begins at column EXPECTED_START_COL and ends at
2459 EXPECTED_FINISH_COL (unless the locations are beyond
2460 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2461 columns). */
2463 static void
2464 assert_char_at_range (const location &loc,
2465 lexer_test& test,
2466 location_t strloc, enum cpp_ttype type, int idx,
2467 int expected_line, int expected_start_col,
2468 int expected_finish_col)
2470 cpp_reader *pfile = test.m_parser;
2471 string_concat_db *concats = &test.m_concats;
2473 source_range actual_range = source_range();
2474 const char *err
2475 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2476 &actual_range);
2477 if (should_have_column_data_p (strloc))
2478 ASSERT_EQ_AT (loc, NULL, err);
2479 else
2481 ASSERT_STREQ_AT (loc,
2482 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2483 err);
2484 return;
2487 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2488 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2489 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2490 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2492 if (should_have_column_data_p (actual_range.m_start))
2494 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2495 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2497 if (should_have_column_data_p (actual_range.m_finish))
2499 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2500 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2504 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2505 the effective location of any errors. */
2507 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2508 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2509 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2510 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2511 (EXPECTED_FINISH_COL))
2513 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2514 using the string concatenation database for TEST.
2516 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2518 static void
2519 assert_num_substring_ranges (const location &loc,
2520 lexer_test& test,
2521 location_t strloc,
2522 enum cpp_ttype type,
2523 int expected_num_ranges)
2525 cpp_reader *pfile = test.m_parser;
2526 string_concat_db *concats = &test.m_concats;
2528 int actual_num_ranges = -1;
2529 const char *err
2530 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2531 &actual_num_ranges);
2532 if (should_have_column_data_p (strloc))
2533 ASSERT_EQ_AT (loc, NULL, err);
2534 else
2536 ASSERT_STREQ_AT (loc,
2537 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2538 err);
2539 return;
2541 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2544 /* Macro for calling assert_num_substring_ranges, supplying
2545 SELFTEST_LOCATION for the effective location of any errors. */
2547 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2548 EXPECTED_NUM_RANGES) \
2549 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2550 (TYPE), (EXPECTED_NUM_RANGES))
2553 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2554 returns an error (using the string concatenation database for TEST). */
2556 static void
2557 assert_has_no_substring_ranges (const location &loc,
2558 lexer_test& test,
2559 location_t strloc,
2560 enum cpp_ttype type,
2561 const char *expected_err)
2563 cpp_reader *pfile = test.m_parser;
2564 string_concat_db *concats = &test.m_concats;
2565 cpp_substring_ranges ranges;
2566 const char *actual_err
2567 = get_substring_ranges_for_loc (pfile, concats, strloc,
2568 type, ranges);
2569 if (should_have_column_data_p (strloc))
2570 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2571 else
2572 ASSERT_STREQ_AT (loc,
2573 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2574 actual_err);
2577 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2578 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2579 (STRLOC), (TYPE), (ERR))
2581 /* Lex a simple string literal. Verify the substring location data, before
2582 and after running cpp_interpret_string on it. */
2584 static void
2585 test_lexer_string_locations_simple (const line_table_case &case_)
2587 /* Digits 0-9 (with 0 at column 10), the simple way.
2588 ....................000000000.11111111112.2222222223333333333
2589 ....................123456789.01234567890.1234567890123456789
2590 We add a trailing comment to ensure that we correctly locate
2591 the end of the string literal token. */
2592 const char *content = " \"0123456789\" /* not a string */\n";
2593 lexer_test test (case_, content, NULL);
2595 /* Verify that we get the expected token back, with the correct
2596 location information. */
2597 const cpp_token *tok = test.get_token ();
2598 ASSERT_EQ (tok->type, CPP_STRING);
2599 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2600 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2602 /* At this point in lexing, the quote characters are treated as part of
2603 the string (they are stripped off by cpp_interpret_string). */
2605 ASSERT_EQ (tok->val.str.len, 12);
2607 /* Verify that cpp_interpret_string works. */
2608 cpp_string dst_string;
2609 const enum cpp_ttype type = CPP_STRING;
2610 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2611 &dst_string, type);
2612 ASSERT_TRUE (result);
2613 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2614 free (const_cast <unsigned char *> (dst_string.text));
2616 /* Verify ranges of individual characters. This no longer includes the
2617 opening quote, but does include the closing quote. */
2618 for (int i = 0; i <= 10; i++)
2619 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2620 10 + i, 10 + i);
2622 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2625 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2626 encoding. */
2628 static void
2629 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2631 /* EBCDIC support requires iconv. */
2632 if (!HAVE_ICONV)
2633 return;
2635 /* Digits 0-9 (with 0 at column 10), the simple way.
2636 ....................000000000.11111111112.2222222223333333333
2637 ....................123456789.01234567890.1234567890123456789
2638 We add a trailing comment to ensure that we correctly locate
2639 the end of the string literal token. */
2640 const char *content = " \"0123456789\" /* not a string */\n";
2641 ebcdic_execution_charset use_ebcdic;
2642 lexer_test test (case_, content, &use_ebcdic);
2644 /* Verify that we get the expected token back, with the correct
2645 location information. */
2646 const cpp_token *tok = test.get_token ();
2647 ASSERT_EQ (tok->type, CPP_STRING);
2648 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2649 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2651 /* At this point in lexing, the quote characters are treated as part of
2652 the string (they are stripped off by cpp_interpret_string). */
2654 ASSERT_EQ (tok->val.str.len, 12);
2656 /* The remainder of the test requires an iconv implementation that
2657 can convert from UTF-8 to the EBCDIC encoding requested above. */
2658 if (use_ebcdic.iconv_errors_occurred_p ())
2659 return;
2661 /* Verify that cpp_interpret_string works. */
2662 cpp_string dst_string;
2663 const enum cpp_ttype type = CPP_STRING;
2664 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2665 &dst_string, type);
2666 ASSERT_TRUE (result);
2667 /* We should now have EBCDIC-encoded text, specifically
2668 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2669 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2670 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2671 (const char *)dst_string.text);
2672 free (const_cast <unsigned char *> (dst_string.text));
2674 /* Verify that we don't attempt to record substring location information
2675 for such cases. */
2676 ASSERT_HAS_NO_SUBSTRING_RANGES
2677 (test, tok->src_loc, type,
2678 "execution character set != source character set");
2681 /* Lex a string literal containing a hex-escaped character.
2682 Verify the substring location data, before and after running
2683 cpp_interpret_string on it. */
2685 static void
2686 test_lexer_string_locations_hex (const line_table_case &case_)
2688 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2689 and with a space in place of digit 6, to terminate the escaped
2690 hex code.
2691 ....................000000000.111111.11112222.
2692 ....................123456789.012345.67890123. */
2693 const char *content = " \"01234\\x35 789\"\n";
2694 lexer_test test (case_, content, NULL);
2696 /* Verify that we get the expected token back, with the correct
2697 location information. */
2698 const cpp_token *tok = test.get_token ();
2699 ASSERT_EQ (tok->type, CPP_STRING);
2700 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2701 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2703 /* At this point in lexing, the quote characters are treated as part of
2704 the string (they are stripped off by cpp_interpret_string). */
2705 ASSERT_EQ (tok->val.str.len, 15);
2707 /* Verify that cpp_interpret_string works. */
2708 cpp_string dst_string;
2709 const enum cpp_ttype type = CPP_STRING;
2710 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2711 &dst_string, type);
2712 ASSERT_TRUE (result);
2713 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2714 free (const_cast <unsigned char *> (dst_string.text));
2716 /* Verify ranges of individual characters. This no longer includes the
2717 opening quote, but does include the closing quote. */
2718 for (int i = 0; i <= 4; i++)
2719 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2720 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2721 for (int i = 6; i <= 10; i++)
2722 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2724 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2727 /* Lex a string literal containing an octal-escaped character.
2728 Verify the substring location data after running cpp_interpret_string
2729 on it. */
2731 static void
2732 test_lexer_string_locations_oct (const line_table_case &case_)
2734 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2735 and with a space in place of digit 6, to terminate the escaped
2736 octal code.
2737 ....................000000000.111111.11112222.2222223333333333444
2738 ....................123456789.012345.67890123.4567890123456789012 */
2739 const char *content = " \"01234\\065 789\" /* not a string */\n";
2740 lexer_test test (case_, content, NULL);
2742 /* Verify that we get the expected token back, with the correct
2743 location information. */
2744 const cpp_token *tok = test.get_token ();
2745 ASSERT_EQ (tok->type, CPP_STRING);
2746 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2748 /* Verify that cpp_interpret_string works. */
2749 cpp_string dst_string;
2750 const enum cpp_ttype type = CPP_STRING;
2751 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2752 &dst_string, type);
2753 ASSERT_TRUE (result);
2754 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2755 free (const_cast <unsigned char *> (dst_string.text));
2757 /* Verify ranges of individual characters. This no longer includes the
2758 opening quote, but does include the closing quote. */
2759 for (int i = 0; i < 5; i++)
2760 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2761 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2762 for (int i = 6; i <= 10; i++)
2763 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2765 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2768 /* Test of string literal containing letter escapes. */
2770 static void
2771 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2773 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2774 .....................000000000.1.11111.1.1.11222.22222223333333
2775 .....................123456789.0.12345.6.7.89012.34567890123456. */
2776 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2777 lexer_test test (case_, content, NULL);
2779 /* Verify that we get the expected tokens back. */
2780 const cpp_token *tok = test.get_token ();
2781 ASSERT_EQ (tok->type, CPP_STRING);
2782 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2784 /* Verify ranges of individual characters. */
2785 /* "\t". */
2786 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2787 0, 1, 10, 11);
2788 /* "foo". */
2789 for (int i = 1; i <= 3; i++)
2790 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2791 i, 1, 11 + i, 11 + i);
2792 /* "\\" and "\n". */
2793 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2794 4, 1, 15, 16);
2795 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2796 5, 1, 17, 18);
2798 /* "bar" and closing quote for nul-terminator. */
2799 for (int i = 6; i <= 9; i++)
2800 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2801 i, 1, 13 + i, 13 + i);
2803 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2806 /* Another test of a string literal containing a letter escape.
2807 Based on string seen in
2808 printf ("%-%\n");
2809 in gcc.dg/format/c90-printf-1.c. */
2811 static void
2812 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2814 /* .....................000000000.1111.11.1111.22222222223.
2815 .....................123456789.0123.45.6789.01234567890. */
2816 const char *content = (" \"%-%\\n\" /* non-str */\n");
2817 lexer_test test (case_, content, NULL);
2819 /* Verify that we get the expected tokens back. */
2820 const cpp_token *tok = test.get_token ();
2821 ASSERT_EQ (tok->type, CPP_STRING);
2822 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2824 /* Verify ranges of individual characters. */
2825 /* "%-%". */
2826 for (int i = 0; i < 3; i++)
2827 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2828 i, 1, 10 + i, 10 + i);
2829 /* "\n". */
2830 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2831 3, 1, 13, 14);
2833 /* Closing quote for nul-terminator. */
2834 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2835 4, 1, 15, 15);
2837 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2840 /* Lex a string literal containing UCN 4 characters.
2841 Verify the substring location data after running cpp_interpret_string
2842 on it. */
2844 static void
2845 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2847 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2848 as UCN 4.
2849 ....................000000000.111111.111122.222222223.33333333344444
2850 ....................123456789.012345.678901.234567890.12345678901234 */
2851 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2852 lexer_test test (case_, content, NULL);
2854 /* Verify that we get the expected token back, with the correct
2855 location information. */
2856 const cpp_token *tok = test.get_token ();
2857 ASSERT_EQ (tok->type, CPP_STRING);
2858 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2860 /* Verify that cpp_interpret_string works.
2861 The string should be encoded in the execution character
2862 set. Assuming that is UTF-8, we should have the following:
2863 ----------- ---- ----- ------- ----------------
2864 Byte offset Byte Octal Unicode Source Column(s)
2865 ----------- ---- ----- ------- ----------------
2866 0 0x30 '0' 10
2867 1 0x31 '1' 11
2868 2 0x32 '2' 12
2869 3 0x33 '3' 13
2870 4 0x34 '4' 14
2871 5 0xE2 \342 U+2174 15-20
2872 6 0x85 \205 (cont) 15-20
2873 7 0xB4 \264 (cont) 15-20
2874 8 0xE2 \342 U+2175 21-26
2875 9 0x85 \205 (cont) 21-26
2876 10 0xB5 \265 (cont) 21-26
2877 11 0x37 '7' 27
2878 12 0x38 '8' 28
2879 13 0x39 '9' 29
2880 14 0x00 30 (closing quote)
2881 ----------- ---- ----- ------- ---------------. */
2883 cpp_string dst_string;
2884 const enum cpp_ttype type = CPP_STRING;
2885 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2886 &dst_string, type);
2887 ASSERT_TRUE (result);
2888 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2889 (const char *)dst_string.text);
2890 free (const_cast <unsigned char *> (dst_string.text));
2892 /* Verify ranges of individual characters. This no longer includes the
2893 opening quote, but does include the closing quote.
2894 '01234'. */
2895 for (int i = 0; i <= 4; i++)
2896 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2897 /* U+2174. */
2898 for (int i = 5; i <= 7; i++)
2899 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2900 /* U+2175. */
2901 for (int i = 8; i <= 10; i++)
2902 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2903 /* '789' and nul terminator */
2904 for (int i = 11; i <= 14; i++)
2905 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2907 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2910 /* Lex a string literal containing UCN 8 characters.
2911 Verify the substring location data after running cpp_interpret_string
2912 on it. */
2914 static void
2915 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2917 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2918 ....................000000000.111111.1111222222.2222333333333.344444
2919 ....................123456789.012345.6789012345.6789012345678.901234 */
2920 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2921 lexer_test test (case_, content, NULL);
2923 /* Verify that we get the expected token back, with the correct
2924 location information. */
2925 const cpp_token *tok = test.get_token ();
2926 ASSERT_EQ (tok->type, CPP_STRING);
2927 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2928 "\"01234\\U00002174\\U00002175789\"");
2930 /* Verify that cpp_interpret_string works.
2931 The UTF-8 encoding of the string is identical to that from
2932 the ucn4 testcase above; the only difference is the column
2933 locations. */
2934 cpp_string dst_string;
2935 const enum cpp_ttype type = CPP_STRING;
2936 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2937 &dst_string, type);
2938 ASSERT_TRUE (result);
2939 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2940 (const char *)dst_string.text);
2941 free (const_cast <unsigned char *> (dst_string.text));
2943 /* Verify ranges of individual characters. This no longer includes the
2944 opening quote, but does include the closing quote.
2945 '01234'. */
2946 for (int i = 0; i <= 4; i++)
2947 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2948 /* U+2174. */
2949 for (int i = 5; i <= 7; i++)
2950 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2951 /* U+2175. */
2952 for (int i = 8; i <= 10; i++)
2953 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2954 /* '789' at columns 35-37 */
2955 for (int i = 11; i <= 13; i++)
2956 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2957 /* Closing quote/nul-terminator at column 38. */
2958 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2960 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2963 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2965 static uint32_t
2966 uint32_from_big_endian (const uint32_t *ptr_be_value)
2968 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2969 return (((uint32_t) buf[0] << 24)
2970 | ((uint32_t) buf[1] << 16)
2971 | ((uint32_t) buf[2] << 8)
2972 | (uint32_t) buf[3]);
2975 /* Lex a wide string literal and verify that attempts to read substring
2976 location data from it fail gracefully. */
2978 static void
2979 test_lexer_string_locations_wide_string (const line_table_case &case_)
2981 /* Digits 0-9.
2982 ....................000000000.11111111112.22222222233333
2983 ....................123456789.01234567890.12345678901234 */
2984 const char *content = " L\"0123456789\" /* non-str */\n";
2985 lexer_test test (case_, content, NULL);
2987 /* Verify that we get the expected token back, with the correct
2988 location information. */
2989 const cpp_token *tok = test.get_token ();
2990 ASSERT_EQ (tok->type, CPP_WSTRING);
2991 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2993 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2994 cpp_string dst_string;
2995 const enum cpp_ttype type = CPP_WSTRING;
2996 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2997 &dst_string, type);
2998 ASSERT_TRUE (result);
2999 /* The cpp_reader defaults to big-endian with
3000 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3001 now be encoded as UTF-32BE. */
3002 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3003 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3004 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3005 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3006 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3007 free (const_cast <unsigned char *> (dst_string.text));
3009 /* We don't yet support generating substring location information
3010 for L"" strings. */
3011 ASSERT_HAS_NO_SUBSTRING_RANGES
3012 (test, tok->src_loc, type,
3013 "execution character set != source character set");
3016 /* Fetch a big-endian 16-bit value and convert to host endianness. */
3018 static uint16_t
3019 uint16_from_big_endian (const uint16_t *ptr_be_value)
3021 const unsigned char *buf = (const unsigned char *)ptr_be_value;
3022 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3025 /* Lex a u"" string literal and verify that attempts to read substring
3026 location data from it fail gracefully. */
3028 static void
3029 test_lexer_string_locations_string16 (const line_table_case &case_)
3031 /* Digits 0-9.
3032 ....................000000000.11111111112.22222222233333
3033 ....................123456789.01234567890.12345678901234 */
3034 const char *content = " u\"0123456789\" /* non-str */\n";
3035 lexer_test test (case_, content, NULL);
3037 /* Verify that we get the expected token back, with the correct
3038 location information. */
3039 const cpp_token *tok = test.get_token ();
3040 ASSERT_EQ (tok->type, CPP_STRING16);
3041 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3043 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
3044 cpp_string dst_string;
3045 const enum cpp_ttype type = CPP_STRING16;
3046 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3047 &dst_string, type);
3048 ASSERT_TRUE (result);
3050 /* The cpp_reader defaults to big-endian, so dst_string should
3051 now be encoded as UTF-16BE. */
3052 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3053 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3054 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3055 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3056 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3057 free (const_cast <unsigned char *> (dst_string.text));
3059 /* We don't yet support generating substring location information
3060 for L"" strings. */
3061 ASSERT_HAS_NO_SUBSTRING_RANGES
3062 (test, tok->src_loc, type,
3063 "execution character set != source character set");
3066 /* Lex a U"" string literal and verify that attempts to read substring
3067 location data from it fail gracefully. */
3069 static void
3070 test_lexer_string_locations_string32 (const line_table_case &case_)
3072 /* Digits 0-9.
3073 ....................000000000.11111111112.22222222233333
3074 ....................123456789.01234567890.12345678901234 */
3075 const char *content = " U\"0123456789\" /* non-str */\n";
3076 lexer_test test (case_, content, NULL);
3078 /* Verify that we get the expected token back, with the correct
3079 location information. */
3080 const cpp_token *tok = test.get_token ();
3081 ASSERT_EQ (tok->type, CPP_STRING32);
3082 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3084 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
3085 cpp_string dst_string;
3086 const enum cpp_ttype type = CPP_STRING32;
3087 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3088 &dst_string, type);
3089 ASSERT_TRUE (result);
3091 /* The cpp_reader defaults to big-endian, so dst_string should
3092 now be encoded as UTF-32BE. */
3093 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3094 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3095 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3096 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3097 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3098 free (const_cast <unsigned char *> (dst_string.text));
3100 /* We don't yet support generating substring location information
3101 for L"" strings. */
3102 ASSERT_HAS_NO_SUBSTRING_RANGES
3103 (test, tok->src_loc, type,
3104 "execution character set != source character set");
3107 /* Lex a u8-string literal.
3108 Verify the substring location data after running cpp_interpret_string
3109 on it. */
3111 static void
3112 test_lexer_string_locations_u8 (const line_table_case &case_)
3114 /* Digits 0-9.
3115 ....................000000000.11111111112.22222222233333
3116 ....................123456789.01234567890.12345678901234 */
3117 const char *content = " u8\"0123456789\" /* non-str */\n";
3118 lexer_test test (case_, content, NULL);
3120 /* Verify that we get the expected token back, with the correct
3121 location information. */
3122 const cpp_token *tok = test.get_token ();
3123 ASSERT_EQ (tok->type, CPP_UTF8STRING);
3124 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3126 /* Verify that cpp_interpret_string works. */
3127 cpp_string dst_string;
3128 const enum cpp_ttype type = CPP_STRING;
3129 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3130 &dst_string, type);
3131 ASSERT_TRUE (result);
3132 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3133 free (const_cast <unsigned char *> (dst_string.text));
3135 /* Verify ranges of individual characters. This no longer includes the
3136 opening quote, but does include the closing quote. */
3137 for (int i = 0; i <= 10; i++)
3138 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3141 /* Lex a string literal containing UTF-8 source characters.
3142 Verify the substring location data after running cpp_interpret_string
3143 on it. */
3145 static void
3146 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3148 /* This string literal is written out to the source file as UTF-8,
3149 and is of the form "before mojibake after", where "mojibake"
3150 is written as the following four unicode code points:
3151 U+6587 CJK UNIFIED IDEOGRAPH-6587
3152 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3153 U+5316 CJK UNIFIED IDEOGRAPH-5316
3154 U+3051 HIRAGANA LETTER KE.
3155 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3156 "before" and "after" are 1 byte per unicode character.
3158 The numbering shown are "columns", which are *byte* numbers within
3159 the line, rather than unicode character numbers.
3161 .................... 000000000.1111111.
3162 .................... 123456789.0123456. */
3163 const char *content = (" \"before "
3164 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3165 UTF-8: 0xE6 0x96 0x87
3166 C octal escaped UTF-8: \346\226\207
3167 "column" numbers: 17-19. */
3168 "\346\226\207"
3170 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3171 UTF-8: 0xE5 0xAD 0x97
3172 C octal escaped UTF-8: \345\255\227
3173 "column" numbers: 20-22. */
3174 "\345\255\227"
3176 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3177 UTF-8: 0xE5 0x8C 0x96
3178 C octal escaped UTF-8: \345\214\226
3179 "column" numbers: 23-25. */
3180 "\345\214\226"
3182 /* U+3051 HIRAGANA LETTER KE
3183 UTF-8: 0xE3 0x81 0x91
3184 C octal escaped UTF-8: \343\201\221
3185 "column" numbers: 26-28. */
3186 "\343\201\221"
3188 /* column numbers 29 onwards
3189 2333333.33334444444444
3190 9012345.67890123456789. */
3191 " after\" /* non-str */\n");
3192 lexer_test test (case_, content, NULL);
3194 /* Verify that we get the expected token back, with the correct
3195 location information. */
3196 const cpp_token *tok = test.get_token ();
3197 ASSERT_EQ (tok->type, CPP_STRING);
3198 ASSERT_TOKEN_AS_TEXT_EQ
3199 (test.m_parser, tok,
3200 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3202 /* Verify that cpp_interpret_string works. */
3203 cpp_string dst_string;
3204 const enum cpp_ttype type = CPP_STRING;
3205 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3206 &dst_string, type);
3207 ASSERT_TRUE (result);
3208 ASSERT_STREQ
3209 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3210 (const char *)dst_string.text);
3211 free (const_cast <unsigned char *> (dst_string.text));
3213 /* Verify ranges of individual characters. This no longer includes the
3214 opening quote, but does include the closing quote.
3215 Assuming that both source and execution encodings are UTF-8, we have
3216 a run of 25 octets in each, plus the NUL terminator. */
3217 for (int i = 0; i < 25; i++)
3218 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3219 /* NUL-terminator should use the closing quote at column 35. */
3220 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3222 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3225 /* Test of string literal concatenation. */
3227 static void
3228 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3230 /* Digits 0-9.
3231 .....................000000000.111111.11112222222222
3232 .....................123456789.012345.67890123456789. */
3233 const char *content = (" \"01234\" /* non-str */\n"
3234 " \"56789\" /* non-str */\n");
3235 lexer_test test (case_, content, NULL);
3237 location_t input_locs[2];
3239 /* Verify that we get the expected tokens back. */
3240 auto_vec <cpp_string> input_strings;
3241 const cpp_token *tok_a = test.get_token ();
3242 ASSERT_EQ (tok_a->type, CPP_STRING);
3243 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3244 input_strings.safe_push (tok_a->val.str);
3245 input_locs[0] = tok_a->src_loc;
3247 const cpp_token *tok_b = test.get_token ();
3248 ASSERT_EQ (tok_b->type, CPP_STRING);
3249 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3250 input_strings.safe_push (tok_b->val.str);
3251 input_locs[1] = tok_b->src_loc;
3253 /* Verify that cpp_interpret_string works. */
3254 cpp_string dst_string;
3255 const enum cpp_ttype type = CPP_STRING;
3256 bool result = cpp_interpret_string (test.m_parser,
3257 input_strings.address (), 2,
3258 &dst_string, type);
3259 ASSERT_TRUE (result);
3260 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3261 free (const_cast <unsigned char *> (dst_string.text));
3263 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3264 test.m_concats.record_string_concatenation (2, input_locs);
3266 location_t initial_loc = input_locs[0];
3268 /* "01234" on line 1. */
3269 for (int i = 0; i <= 4; i++)
3270 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3271 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3272 for (int i = 5; i <= 10; i++)
3273 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3275 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3278 /* Another test of string literal concatenation. */
3280 static void
3281 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3283 /* Digits 0-9.
3284 .....................000000000.111.11111112222222
3285 .....................123456789.012.34567890123456. */
3286 const char *content = (" \"01\" /* non-str */\n"
3287 " \"23\" /* non-str */\n"
3288 " \"45\" /* non-str */\n"
3289 " \"67\" /* non-str */\n"
3290 " \"89\" /* non-str */\n");
3291 lexer_test test (case_, content, NULL);
3293 auto_vec <cpp_string> input_strings;
3294 location_t input_locs[5];
3296 /* Verify that we get the expected tokens back. */
3297 for (int i = 0; i < 5; i++)
3299 const cpp_token *tok = test.get_token ();
3300 ASSERT_EQ (tok->type, CPP_STRING);
3301 input_strings.safe_push (tok->val.str);
3302 input_locs[i] = tok->src_loc;
3305 /* Verify that cpp_interpret_string works. */
3306 cpp_string dst_string;
3307 const enum cpp_ttype type = CPP_STRING;
3308 bool result = cpp_interpret_string (test.m_parser,
3309 input_strings.address (), 5,
3310 &dst_string, type);
3311 ASSERT_TRUE (result);
3312 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3313 free (const_cast <unsigned char *> (dst_string.text));
3315 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3316 test.m_concats.record_string_concatenation (5, input_locs);
3318 location_t initial_loc = input_locs[0];
3320 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3321 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3322 and expect get_source_range_for_substring to fail.
3323 However, for a string concatenation test, we can have a case
3324 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3325 but subsequent strings can be after it.
3326 Attempting to detect this within assert_char_at_range
3327 would overcomplicate the logic for the common test cases, so
3328 we detect it here. */
3329 if (should_have_column_data_p (input_locs[0])
3330 && !should_have_column_data_p (input_locs[4]))
3332 /* Verify that get_source_range_for_substring gracefully rejects
3333 this case. */
3334 source_range actual_range;
3335 const char *err
3336 = get_source_range_for_char (test.m_parser, &test.m_concats,
3337 initial_loc, type, 0, &actual_range);
3338 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3339 return;
3342 for (int i = 0; i < 5; i++)
3343 for (int j = 0; j < 2; j++)
3344 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3345 i + 1, 10 + j, 10 + j);
3347 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3348 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3350 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3353 /* Another test of string literal concatenation, this time combined with
3354 various kinds of escaped characters. */
3356 static void
3357 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3359 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3360 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3361 const char *content
3362 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3363 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3364 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3365 lexer_test test (case_, content, NULL);
3367 auto_vec <cpp_string> input_strings;
3368 location_t input_locs[4];
3370 /* Verify that we get the expected tokens back. */
3371 for (int i = 0; i < 4; i++)
3373 const cpp_token *tok = test.get_token ();
3374 ASSERT_EQ (tok->type, CPP_STRING);
3375 input_strings.safe_push (tok->val.str);
3376 input_locs[i] = tok->src_loc;
3379 /* Verify that cpp_interpret_string works. */
3380 cpp_string dst_string;
3381 const enum cpp_ttype type = CPP_STRING;
3382 bool result = cpp_interpret_string (test.m_parser,
3383 input_strings.address (), 4,
3384 &dst_string, type);
3385 ASSERT_TRUE (result);
3386 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3387 free (const_cast <unsigned char *> (dst_string.text));
3389 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3390 test.m_concats.record_string_concatenation (4, input_locs);
3392 location_t initial_loc = input_locs[0];
3394 for (int i = 0; i <= 4; i++)
3395 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3396 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3397 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3398 for (int i = 7; i <= 9; i++)
3399 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3401 /* NUL-terminator should use the location of the final closing quote. */
3402 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3404 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3407 /* Test of string literal in a macro. */
3409 static void
3410 test_lexer_string_locations_macro (const line_table_case &case_)
3412 /* Digits 0-9.
3413 .....................0000000001111111111.22222222223.
3414 .....................1234567890123456789.01234567890. */
3415 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3416 " MACRO");
3417 lexer_test test (case_, content, NULL);
3419 /* Verify that we get the expected tokens back. */
3420 const cpp_token *tok = test.get_token ();
3421 ASSERT_EQ (tok->type, CPP_PADDING);
3423 tok = test.get_token ();
3424 ASSERT_EQ (tok->type, CPP_STRING);
3425 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3427 /* Verify ranges of individual characters. We ought to
3428 see columns within the macro definition. */
3429 for (int i = 0; i <= 10; i++)
3430 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3431 i, 1, 20 + i, 20 + i);
3433 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3435 tok = test.get_token ();
3436 ASSERT_EQ (tok->type, CPP_PADDING);
3439 /* Test of stringification of a macro argument. */
3441 static void
3442 test_lexer_string_locations_stringified_macro_argument
3443 (const line_table_case &case_)
3445 /* .....................000000000111111111122222222223.
3446 .....................123456789012345678901234567890. */
3447 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3448 "MACRO(foo)\n");
3449 lexer_test test (case_, content, NULL);
3451 /* Verify that we get the expected token back. */
3452 const cpp_token *tok = test.get_token ();
3453 ASSERT_EQ (tok->type, CPP_PADDING);
3455 tok = test.get_token ();
3456 ASSERT_EQ (tok->type, CPP_STRING);
3457 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3459 /* We don't support getting the location of a stringified macro
3460 argument. Verify that it fails gracefully. */
3461 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3462 "cpp_interpret_string_1 failed");
3464 tok = test.get_token ();
3465 ASSERT_EQ (tok->type, CPP_PADDING);
3467 tok = test.get_token ();
3468 ASSERT_EQ (tok->type, CPP_PADDING);
3471 /* Ensure that we are fail gracefully if something attempts to pass
3472 in a location that isn't a string literal token. Seen on this code:
3474 const char a[] = " %d ";
3475 __builtin_printf (a, 0.5);
3478 when c-format.c erroneously used the indicated one-character
3479 location as the format string location, leading to a read past the
3480 end of a string buffer in cpp_interpret_string_1. */
3482 static void
3483 test_lexer_string_locations_non_string (const line_table_case &case_)
3485 /* .....................000000000111111111122222222223.
3486 .....................123456789012345678901234567890. */
3487 const char *content = (" a\n");
3488 lexer_test test (case_, content, NULL);
3490 /* Verify that we get the expected token back. */
3491 const cpp_token *tok = test.get_token ();
3492 ASSERT_EQ (tok->type, CPP_NAME);
3493 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3495 /* At this point, libcpp is attempting to interpret the name as a
3496 string literal, despite it not starting with a quote. We don't detect
3497 that, but we should at least fail gracefully. */
3498 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3499 "cpp_interpret_string_1 failed");
3502 /* Ensure that we can read substring information for a token which
3503 starts in one linemap and ends in another . Adapted from
3504 gcc.dg/cpp/pr69985.c. */
3506 static void
3507 test_lexer_string_locations_long_line (const line_table_case &case_)
3509 /* .....................000000.000111111111
3510 .....................123456.789012346789. */
3511 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3512 " \"0123456789012345678901234567890123456789"
3513 "0123456789012345678901234567890123456789"
3514 "0123456789012345678901234567890123456789"
3515 "0123456789\"\n");
3517 lexer_test test (case_, content, NULL);
3519 /* Verify that we get the expected token back. */
3520 const cpp_token *tok = test.get_token ();
3521 ASSERT_EQ (tok->type, CPP_STRING);
3523 if (!should_have_column_data_p (line_table->highest_location))
3524 return;
3526 /* Verify ranges of individual characters. */
3527 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3528 for (int i = 0; i < 131; i++)
3529 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3530 i, 2, 7 + i, 7 + i);
3533 /* Test of locations within a raw string that doesn't contain a newline. */
3535 static void
3536 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3538 /* .....................00.0000000111111111122.
3539 .....................12.3456789012345678901. */
3540 const char *content = ("R\"foo(0123456789)foo\"\n");
3541 lexer_test test (case_, content, NULL);
3543 /* Verify that we get the expected token back. */
3544 const cpp_token *tok = test.get_token ();
3545 ASSERT_EQ (tok->type, CPP_STRING);
3547 /* Verify that cpp_interpret_string works. */
3548 cpp_string dst_string;
3549 const enum cpp_ttype type = CPP_STRING;
3550 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3551 &dst_string, type);
3552 ASSERT_TRUE (result);
3553 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3554 free (const_cast <unsigned char *> (dst_string.text));
3556 if (!should_have_column_data_p (line_table->highest_location))
3557 return;
3559 /* 0-9, plus the nil terminator. */
3560 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3561 for (int i = 0; i < 11; i++)
3562 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3563 i, 1, 7 + i, 7 + i);
3566 /* Test of locations within a raw string that contains a newline. */
3568 static void
3569 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3571 /* .....................00.0000.
3572 .....................12.3456. */
3573 const char *content = ("R\"foo(\n"
3574 /* .....................00000.
3575 .....................12345. */
3576 "hello\n"
3577 "world\n"
3578 /* .....................00000.
3579 .....................12345. */
3580 ")foo\"\n");
3581 lexer_test test (case_, content, NULL);
3583 /* Verify that we get the expected token back. */
3584 const cpp_token *tok = test.get_token ();
3585 ASSERT_EQ (tok->type, CPP_STRING);
3587 /* Verify that cpp_interpret_string works. */
3588 cpp_string dst_string;
3589 const enum cpp_ttype type = CPP_STRING;
3590 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3591 &dst_string, type);
3592 ASSERT_TRUE (result);
3593 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3594 free (const_cast <unsigned char *> (dst_string.text));
3596 if (!should_have_column_data_p (line_table->highest_location))
3597 return;
3599 /* Currently we don't support locations within raw strings that
3600 contain newlines. */
3601 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3602 "range endpoints are on different lines");
3605 /* Test of parsing an unterminated raw string. */
3607 static void
3608 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3610 const char *content = "R\"ouch()ouCh\" /* etc */";
3612 lexer_diagnostic_sink diagnostics;
3613 lexer_test test (case_, content, &diagnostics);
3614 test.m_implicitly_expect_EOF = false;
3616 /* Attempt to parse the raw string. */
3617 const cpp_token *tok = test.get_token ();
3618 ASSERT_EQ (tok->type, CPP_EOF);
3620 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3621 /* We expect the message "unterminated raw string"
3622 in the "cpplib" translation domain.
3623 It's not clear that dgettext is available on all supported hosts,
3624 so this assertion is commented-out for now.
3625 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3626 diagnostics.m_diagnostics[0]);
3630 /* Test of lexing char constants. */
3632 static void
3633 test_lexer_char_constants (const line_table_case &case_)
3635 /* Various char constants.
3636 .....................0000000001111111111.22222222223.
3637 .....................1234567890123456789.01234567890. */
3638 const char *content = (" 'a'\n"
3639 " u'a'\n"
3640 " U'a'\n"
3641 " L'a'\n"
3642 " 'abc'\n");
3643 lexer_test test (case_, content, NULL);
3645 /* Verify that we get the expected tokens back. */
3646 /* 'a'. */
3647 const cpp_token *tok = test.get_token ();
3648 ASSERT_EQ (tok->type, CPP_CHAR);
3649 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3651 unsigned int chars_seen;
3652 int unsignedp;
3653 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3654 &chars_seen, &unsignedp);
3655 ASSERT_EQ (cc, 'a');
3656 ASSERT_EQ (chars_seen, 1);
3658 /* u'a'. */
3659 tok = test.get_token ();
3660 ASSERT_EQ (tok->type, CPP_CHAR16);
3661 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3663 /* U'a'. */
3664 tok = test.get_token ();
3665 ASSERT_EQ (tok->type, CPP_CHAR32);
3666 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3668 /* L'a'. */
3669 tok = test.get_token ();
3670 ASSERT_EQ (tok->type, CPP_WCHAR);
3671 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3673 /* 'abc' (c-char-sequence). */
3674 tok = test.get_token ();
3675 ASSERT_EQ (tok->type, CPP_CHAR);
3676 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3678 /* A table of interesting location_t values, giving one axis of our test
3679 matrix. */
3681 static const location_t boundary_locations[] = {
3682 /* Zero means "don't override the default values for a new line_table". */
3685 /* An arbitrary non-zero value that isn't close to one of
3686 the boundary values below. */
3687 0x10000,
3689 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3690 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3691 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3692 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3693 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3694 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3696 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3697 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3698 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3699 LINE_MAP_MAX_LOCATION_WITH_COLS,
3700 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3701 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3704 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3706 void
3707 for_each_line_table_case (void (*testcase) (const line_table_case &))
3709 /* As noted above in the description of struct line_table_case,
3710 we want to explore a test matrix of interesting line_table
3711 situations, running various selftests for each case within the
3712 matrix. */
3714 /* Run all tests with:
3715 (a) line_table->default_range_bits == 0, and
3716 (b) line_table->default_range_bits == 5. */
3717 int num_cases_tested = 0;
3718 for (int default_range_bits = 0; default_range_bits <= 5;
3719 default_range_bits += 5)
3721 /* ...and use each of the "interesting" location values as
3722 the starting location within line_table. */
3723 const int num_boundary_locations
3724 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3725 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3727 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3729 testcase (c);
3731 num_cases_tested++;
3735 /* Verify that we fully covered the test matrix. */
3736 ASSERT_EQ (num_cases_tested, 2 * 12);
3739 /* Verify that when presented with a consecutive pair of locations with
3740 a very large line offset, we don't attempt to consolidate them into
3741 a single ordinary linemap where the line offsets within the line map
3742 would lead to overflow (PR lto/88147). */
3744 static void
3745 test_line_offset_overflow ()
3747 line_table_test ltt (line_table_case (5, 0));
3749 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3750 linemap_line_start (line_table, 1, 100);
3751 location_t loc_a = linemap_line_start (line_table, 2578, 255);
3752 assert_loceq ("foo.c", 2578, 0, loc_a);
3754 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3755 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3756 ASSERT_EQ (ordmap_a->m_range_bits, 5);
3758 location_t loc_b = linemap_line_start (line_table, 404198, 512);
3759 assert_loceq ("foo.c", 404198, 0, loc_b);
3761 /* We should have started a new linemap, rather than attempting to store
3762 a very large line offset. */
3763 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3764 ASSERT_NE (ordmap_a, ordmap_b);
3767 void test_cpp_utf8 ()
3769 const int def_tabstop = 8;
3770 /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
3772 int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, def_tabstop);
3773 ASSERT_EQ (8, w_bad);
3774 int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, def_tabstop);
3775 ASSERT_EQ (5, w_ctrl);
3778 /* Verify that wcwidth of valid UTF-8 is as expected. */
3780 const int w_pi = cpp_display_width ("\xcf\x80", 2, def_tabstop);
3781 ASSERT_EQ (1, w_pi);
3782 const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, def_tabstop);
3783 ASSERT_EQ (2, w_emoji);
3784 const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3785 def_tabstop);
3786 ASSERT_EQ (1, w_umlaut_precomposed);
3787 const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3788 def_tabstop);
3789 ASSERT_EQ (1, w_umlaut_combining);
3790 const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, def_tabstop);
3791 ASSERT_EQ (2, w_han);
3792 const int w_ascii = cpp_display_width ("GCC", 3, def_tabstop);
3793 ASSERT_EQ (3, w_ascii);
3794 const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3795 "\x9f! \xe4\xb8\xba y\xcc\x88",
3796 24, def_tabstop);
3797 ASSERT_EQ (18, w_mixed);
3800 /* Verify that display width properly expands tabs. */
3802 const char *tstr = "\tabc\td";
3803 ASSERT_EQ (6, cpp_display_width (tstr, 6, 1));
3804 ASSERT_EQ (10, cpp_display_width (tstr, 6, 3));
3805 ASSERT_EQ (17, cpp_display_width (tstr, 6, 8));
3806 ASSERT_EQ (1, cpp_display_column_to_byte_column (tstr, 6, 7, 8));
3809 /* Verify that cpp_byte_column_to_display_column can go past the end,
3810 and similar edge cases. */
3812 const char *str
3813 /* Display columns.
3814 111111112345 */
3815 = "\xcf\x80 abc";
3816 /* 111122223456
3817 Byte columns. */
3819 ASSERT_EQ (5, cpp_display_width (str, 6, def_tabstop));
3820 ASSERT_EQ (105,
3821 cpp_byte_column_to_display_column (str, 6, 106, def_tabstop));
3822 ASSERT_EQ (10000,
3823 cpp_byte_column_to_display_column (NULL, 0, 10000, def_tabstop));
3824 ASSERT_EQ (0,
3825 cpp_byte_column_to_display_column (NULL, 10000, 0, def_tabstop));
3828 /* Verify that cpp_display_column_to_byte_column can go past the end,
3829 and similar edge cases, and check invertibility. */
3831 const char *str
3832 /* Display columns.
3833 000000000000000000000000000000000000011
3834 111111112222222234444444455555555678901 */
3835 = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
3836 /* 000000000000000000000000000000000111111
3837 111122223333444456666777788889999012345
3838 Byte columns. */
3839 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, def_tabstop));
3840 ASSERT_EQ (15,
3841 cpp_display_column_to_byte_column (str, 15, 11, def_tabstop));
3842 ASSERT_EQ (115,
3843 cpp_display_column_to_byte_column (str, 15, 111, def_tabstop));
3844 ASSERT_EQ (10000,
3845 cpp_display_column_to_byte_column (NULL, 0, 10000, def_tabstop));
3846 ASSERT_EQ (0,
3847 cpp_display_column_to_byte_column (NULL, 10000, 0, def_tabstop));
3849 /* Verify that we do not interrupt a UTF-8 sequence. */
3850 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, def_tabstop));
3852 for (int byte_col = 1; byte_col <= 15; ++byte_col)
3854 const int disp_col
3855 = cpp_byte_column_to_display_column (str, 15, byte_col, def_tabstop);
3856 const int byte_col2
3857 = cpp_display_column_to_byte_column (str, 15, disp_col, def_tabstop);
3859 /* If we ask for the display column in the middle of a UTF-8
3860 sequence, it will return the length of the partial sequence,
3861 matching the behavior of GCC before display column support.
3862 Otherwise check the round trip was successful. */
3863 if (byte_col < 4)
3864 ASSERT_EQ (byte_col, disp_col);
3865 else if (byte_col >= 6 && byte_col < 9)
3866 ASSERT_EQ (3 + (byte_col - 5), disp_col);
3867 else
3868 ASSERT_EQ (byte_col2, byte_col);
3874 /* Run all of the selftests within this file. */
3876 void
3877 input_c_tests ()
3879 test_linenum_comparisons ();
3880 test_should_have_column_data_p ();
3881 test_unknown_location ();
3882 test_builtins ();
3883 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3885 for_each_line_table_case (test_accessing_ordinary_linemaps);
3886 for_each_line_table_case (test_lexer);
3887 for_each_line_table_case (test_lexer_string_locations_simple);
3888 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3889 for_each_line_table_case (test_lexer_string_locations_hex);
3890 for_each_line_table_case (test_lexer_string_locations_oct);
3891 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3892 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3893 for_each_line_table_case (test_lexer_string_locations_ucn4);
3894 for_each_line_table_case (test_lexer_string_locations_ucn8);
3895 for_each_line_table_case (test_lexer_string_locations_wide_string);
3896 for_each_line_table_case (test_lexer_string_locations_string16);
3897 for_each_line_table_case (test_lexer_string_locations_string32);
3898 for_each_line_table_case (test_lexer_string_locations_u8);
3899 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3900 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3901 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3902 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3903 for_each_line_table_case (test_lexer_string_locations_macro);
3904 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3905 for_each_line_table_case (test_lexer_string_locations_non_string);
3906 for_each_line_table_case (test_lexer_string_locations_long_line);
3907 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3908 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3909 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3910 for_each_line_table_case (test_lexer_char_constants);
3912 test_reading_source_line ();
3914 test_line_offset_overflow ();
3916 test_cpp_utf8 ();
3919 } // namespace selftest
3921 #endif /* CHECKING_P */