Support TI mode and soft float on PA64
[official-gcc.git] / gcc / input.c
blob4650547c7c91bacd6bd63ba97a5975b107f70e20
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2021 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic.h"
25 #include "selftest.h"
26 #include "cpplib.h"
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
32 /* Input charset configuration. */
33 static const char *default_charset_callback (const char *)
35 return nullptr;
38 void
39 file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
40 bool should_skip_bom)
42 in_context.ccb = (ccb ? ccb : default_charset_callback);
43 in_context.should_skip_bom = should_skip_bom;
46 /* This is a cache used by get_next_line to store the content of a
47 file to be searched for file lines. */
48 class file_cache_slot
50 public:
51 file_cache_slot ();
52 ~file_cache_slot ();
54 bool read_line_num (size_t line_num,
55 char ** line, ssize_t *line_len);
57 /* Accessors. */
58 const char *get_file_path () const { return m_file_path; }
59 unsigned get_use_count () const { return m_use_count; }
60 bool missing_trailing_newline_p () const
62 return m_missing_trailing_newline;
65 void inc_use_count () { m_use_count++; }
67 bool create (const file_cache::input_context &in_context,
68 const char *file_path, FILE *fp, unsigned highest_use_count);
69 void evict ();
71 private:
72 /* These are information used to store a line boundary. */
73 class line_info
75 public:
76 /* The line number. It starts from 1. */
77 size_t line_num;
79 /* The position (byte count) of the beginning of the line,
80 relative to the file data pointer. This starts at zero. */
81 size_t start_pos;
83 /* The position (byte count) of the last byte of the line. This
84 normally points to the '\n' character, or to one byte after the
85 last byte of the file, if the file doesn't contain a '\n'
86 character. */
87 size_t end_pos;
89 line_info (size_t l, size_t s, size_t e)
90 : line_num (l), start_pos (s), end_pos (e)
93 line_info ()
94 :line_num (0), start_pos (0), end_pos (0)
98 bool needs_read_p () const;
99 bool needs_grow_p () const;
100 void maybe_grow ();
101 bool read_data ();
102 bool maybe_read_data ();
103 bool get_next_line (char **line, ssize_t *line_len);
104 bool read_next_line (char ** line, ssize_t *line_len);
105 bool goto_next_line ();
107 static const size_t buffer_size = 4 * 1024;
108 static const size_t line_record_size = 100;
110 /* The number of time this file has been accessed. This is used
111 to designate which file cache to evict from the cache
112 array. */
113 unsigned m_use_count;
115 /* The file_path is the key for identifying a particular file in
116 the cache.
117 For libcpp-using code, the underlying buffer for this field is
118 owned by the corresponding _cpp_file within the cpp_reader. */
119 const char *m_file_path;
121 FILE *m_fp;
123 /* This points to the content of the file that we've read so
124 far. */
125 char *m_data;
127 /* The allocated buffer to be freed may start a little earlier than DATA,
128 e.g. if a UTF8 BOM was skipped at the beginning. */
129 int m_alloc_offset;
131 /* The size of the DATA array above.*/
132 size_t m_size;
134 /* The number of bytes read from the underlying file so far. This
135 must be less (or equal) than SIZE above. */
136 size_t m_nb_read;
138 /* The index of the beginning of the current line. */
139 size_t m_line_start_idx;
141 /* The number of the previous line read. This starts at 1. Zero
142 means we've read no line so far. */
143 size_t m_line_num;
145 /* This is the total number of lines of the current file. At the
146 moment, we try to get this information from the line map
147 subsystem. Note that this is just a hint. When using the C++
148 front-end, this hint is correct because the input file is then
149 completely tokenized before parsing starts; so the line map knows
150 the number of lines before compilation really starts. For e.g,
151 the C front-end, it can happen that we start emitting diagnostics
152 before the line map has seen the end of the file. */
153 size_t m_total_lines;
155 /* Could this file be missing a trailing newline on its final line?
156 Initially true (to cope with empty files), set to true/false
157 as each line is read. */
158 bool m_missing_trailing_newline;
160 /* This is a record of the beginning and end of the lines we've seen
161 while reading the file. This is useful to avoid walking the data
162 from the beginning when we are asked to read a line that is
163 before LINE_START_IDX above. Note that the maximum size of this
164 record is line_record_size, so that the memory consumption
165 doesn't explode. We thus scale total_lines down to
166 line_record_size. */
167 vec<line_info, va_heap> m_line_record;
169 void offset_buffer (int offset)
171 gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
172 : (size_t) offset <= m_size);
173 gcc_assert (m_data);
174 m_alloc_offset += offset;
175 m_data += offset;
176 m_size -= offset;
181 /* Current position in real source file. */
183 location_t input_location = UNKNOWN_LOCATION;
185 class line_maps *line_table;
187 /* A stashed copy of "line_table" for use by selftest::line_table_test.
188 This needs to be a global so that it can be a GC root, and thus
189 prevent the stashed copy from being garbage-collected if the GC runs
190 during a line_table_test. */
192 class line_maps *saved_line_table;
194 /* Expand the source location LOC into a human readable location. If
195 LOC resolves to a builtin location, the file name of the readable
196 location is set to the string "<built-in>". If EXPANSION_POINT_P is
197 TRUE and LOC is virtual, then it is resolved to the expansion
198 point of the involved macro. Otherwise, it is resolved to the
199 spelling location of the token.
201 When resolving to the spelling location of the token, if the
202 resulting location is for a built-in location (that is, it has no
203 associated line/column) in the context of a macro expansion, the
204 returned location is the first one (while unwinding the macro
205 location towards its expansion point) that is in real source
206 code.
208 ASPECT controls which part of the location to use. */
210 static expanded_location
211 expand_location_1 (location_t loc,
212 bool expansion_point_p,
213 enum location_aspect aspect)
215 expanded_location xloc;
216 const line_map_ordinary *map;
217 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
218 tree block = NULL;
220 if (IS_ADHOC_LOC (loc))
222 block = LOCATION_BLOCK (loc);
223 loc = LOCATION_LOCUS (loc);
226 memset (&xloc, 0, sizeof (xloc));
228 if (loc >= RESERVED_LOCATION_COUNT)
230 if (!expansion_point_p)
232 /* We want to resolve LOC to its spelling location.
234 But if that spelling location is a reserved location that
235 appears in the context of a macro expansion (like for a
236 location for a built-in token), let's consider the first
237 location (toward the expansion point) that is not reserved;
238 that is, the first location that is in real source code. */
239 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
240 loc, NULL);
241 lrk = LRK_SPELLING_LOCATION;
243 loc = linemap_resolve_location (line_table, loc, lrk, &map);
245 /* loc is now either in an ordinary map, or is a reserved location.
246 If it is a compound location, the caret is in a spelling location,
247 but the start/finish might still be a virtual location.
248 Depending of what the caller asked for, we may need to recurse
249 one level in order to resolve any virtual locations in the
250 end-points. */
251 switch (aspect)
253 default:
254 gcc_unreachable ();
255 /* Fall through. */
256 case LOCATION_ASPECT_CARET:
257 break;
258 case LOCATION_ASPECT_START:
260 location_t start = get_start (loc);
261 if (start != loc)
262 return expand_location_1 (start, expansion_point_p, aspect);
264 break;
265 case LOCATION_ASPECT_FINISH:
267 location_t finish = get_finish (loc);
268 if (finish != loc)
269 return expand_location_1 (finish, expansion_point_p, aspect);
271 break;
273 xloc = linemap_expand_location (line_table, map, loc);
276 xloc.data = block;
277 if (loc <= BUILTINS_LOCATION)
278 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
280 return xloc;
283 /* Initialize the set of cache used for files accessed by caret
284 diagnostic. */
286 static void
287 diagnostic_file_cache_init (void)
289 gcc_assert (global_dc);
290 if (global_dc->m_file_cache == NULL)
291 global_dc->m_file_cache = new file_cache ();
294 /* Free the resources used by the set of cache used for files accessed
295 by caret diagnostic. */
297 void
298 diagnostic_file_cache_fini (void)
300 if (global_dc->m_file_cache)
302 delete global_dc->m_file_cache;
303 global_dc->m_file_cache = NULL;
307 /* Return the total lines number that have been read so far by the
308 line map (in the preprocessor) so far. For languages like C++ that
309 entirely preprocess the input file before starting to parse, this
310 equals the actual number of lines of the file. */
312 static size_t
313 total_lines_num (const char *file_path)
315 size_t r = 0;
316 location_t l = 0;
317 if (linemap_get_file_highest_location (line_table, file_path, &l))
319 gcc_assert (l >= RESERVED_LOCATION_COUNT);
320 expanded_location xloc = expand_location (l);
321 r = xloc.line;
323 return r;
326 /* Lookup the cache used for the content of a given file accessed by
327 caret diagnostic. Return the found cached file, or NULL if no
328 cached file was found. */
330 file_cache_slot *
331 file_cache::lookup_file (const char *file_path)
333 gcc_assert (file_path);
335 /* This will contain the found cached file. */
336 file_cache_slot *r = NULL;
337 for (unsigned i = 0; i < num_file_slots; ++i)
339 file_cache_slot *c = &m_file_slots[i];
340 if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
342 c->inc_use_count ();
343 r = c;
347 if (r)
348 r->inc_use_count ();
350 return r;
353 /* Purge any mention of FILENAME from the cache of files used for
354 printing source code. For use in selftests when working
355 with tempfiles. */
357 void
358 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
360 gcc_assert (file_path);
362 if (!global_dc->m_file_cache)
363 return;
365 global_dc->m_file_cache->forcibly_evict_file (file_path);
368 void
369 file_cache::forcibly_evict_file (const char *file_path)
371 gcc_assert (file_path);
373 file_cache_slot *r = lookup_file (file_path);
374 if (!r)
375 /* Not found. */
376 return;
378 r->evict ();
381 void
382 file_cache_slot::evict ()
384 m_file_path = NULL;
385 if (m_fp)
386 fclose (m_fp);
387 m_fp = NULL;
388 m_nb_read = 0;
389 m_line_start_idx = 0;
390 m_line_num = 0;
391 m_line_record.truncate (0);
392 m_use_count = 0;
393 m_total_lines = 0;
394 m_missing_trailing_newline = true;
397 /* Return the file cache that has been less used, recently, or the
398 first empty one. If HIGHEST_USE_COUNT is non-null,
399 *HIGHEST_USE_COUNT is set to the highest use count of the entries
400 in the cache table. */
402 file_cache_slot*
403 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
405 diagnostic_file_cache_init ();
407 file_cache_slot *to_evict = &m_file_slots[0];
408 unsigned huc = to_evict->get_use_count ();
409 for (unsigned i = 1; i < num_file_slots; ++i)
411 file_cache_slot *c = &m_file_slots[i];
412 bool c_is_empty = (c->get_file_path () == NULL);
414 if (c->get_use_count () < to_evict->get_use_count ()
415 || (to_evict->get_file_path () && c_is_empty))
416 /* We evict C because it's either an entry with a lower use
417 count or one that is empty. */
418 to_evict = c;
420 if (huc < c->get_use_count ())
421 huc = c->get_use_count ();
423 if (c_is_empty)
424 /* We've reached the end of the cache; subsequent elements are
425 all empty. */
426 break;
429 if (highest_use_count)
430 *highest_use_count = huc;
432 return to_evict;
435 /* Create the cache used for the content of a given file to be
436 accessed by caret diagnostic. This cache is added to an array of
437 cache and can be retrieved by lookup_file_in_cache_tab. This
438 function returns the created cache. Note that only the last
439 num_file_slots files are cached. */
441 file_cache_slot*
442 file_cache::add_file (const char *file_path)
445 FILE *fp = fopen (file_path, "r");
446 if (fp == NULL)
447 return NULL;
449 unsigned highest_use_count = 0;
450 file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
451 if (!r->create (in_context, file_path, fp, highest_use_count))
452 return NULL;
453 return r;
456 /* Populate this slot for use on FILE_PATH and FP, dropping any
457 existing cached content within it. */
459 bool
460 file_cache_slot::create (const file_cache::input_context &in_context,
461 const char *file_path, FILE *fp,
462 unsigned highest_use_count)
464 m_file_path = file_path;
465 if (m_fp)
466 fclose (m_fp);
467 m_fp = fp;
468 if (m_alloc_offset)
469 offset_buffer (-m_alloc_offset);
470 m_nb_read = 0;
471 m_line_start_idx = 0;
472 m_line_num = 0;
473 m_line_record.truncate (0);
474 /* Ensure that this cache entry doesn't get evicted next time
475 add_file_to_cache_tab is called. */
476 m_use_count = ++highest_use_count;
477 m_total_lines = total_lines_num (file_path);
478 m_missing_trailing_newline = true;
481 /* Check the input configuration to determine if we need to do any
482 transformations, such as charset conversion or BOM skipping. */
483 if (const char *input_charset = in_context.ccb (file_path))
485 /* Need a full-blown conversion of the input charset. */
486 fclose (m_fp);
487 m_fp = NULL;
488 const cpp_converted_source cs
489 = cpp_get_converted_source (file_path, input_charset);
490 if (!cs.data)
491 return false;
492 if (m_data)
493 XDELETEVEC (m_data);
494 m_data = cs.data;
495 m_nb_read = m_size = cs.len;
496 m_alloc_offset = cs.data - cs.to_free;
498 else if (in_context.should_skip_bom)
500 if (read_data ())
502 const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
503 offset_buffer (offset);
504 m_nb_read -= offset;
508 return true;
511 /* file_cache's ctor. */
513 file_cache::file_cache ()
514 : m_file_slots (new file_cache_slot[num_file_slots])
516 initialize_input_context (nullptr, false);
519 /* file_cache's dtor. */
521 file_cache::~file_cache ()
523 delete[] m_file_slots;
526 /* Lookup the cache used for the content of a given file accessed by
527 caret diagnostic. If no cached file was found, create a new cache
528 for this file, add it to the array of cached file and return
529 it. */
531 file_cache_slot*
532 file_cache::lookup_or_add_file (const char *file_path)
534 file_cache_slot *r = lookup_file (file_path);
535 if (r == NULL)
536 r = add_file (file_path);
537 return r;
540 /* Default constructor for a cache of file used by caret
541 diagnostic. */
543 file_cache_slot::file_cache_slot ()
544 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
545 m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
546 m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
548 m_line_record.create (0);
551 /* Destructor for a cache of file used by caret diagnostic. */
553 file_cache_slot::~file_cache_slot ()
555 if (m_fp)
557 fclose (m_fp);
558 m_fp = NULL;
560 if (m_data)
562 offset_buffer (-m_alloc_offset);
563 XDELETEVEC (m_data);
564 m_data = 0;
566 m_line_record.release ();
569 /* Returns TRUE iff the cache would need to be filled with data coming
570 from the file. That is, either the cache is empty or full or the
571 current line is empty. Note that if the cache is full, it would
572 need to be extended and filled again. */
574 bool
575 file_cache_slot::needs_read_p () const
577 return m_fp && (m_nb_read == 0
578 || m_nb_read == m_size
579 || (m_line_start_idx >= m_nb_read - 1));
582 /* Return TRUE iff the cache is full and thus needs to be
583 extended. */
585 bool
586 file_cache_slot::needs_grow_p () const
588 return m_nb_read == m_size;
591 /* Grow the cache if it needs to be extended. */
593 void
594 file_cache_slot::maybe_grow ()
596 if (!needs_grow_p ())
597 return;
599 if (!m_data)
601 gcc_assert (m_size == 0 && m_alloc_offset == 0);
602 m_size = buffer_size;
603 m_data = XNEWVEC (char, m_size);
605 else
607 const int offset = m_alloc_offset;
608 offset_buffer (-offset);
609 m_size *= 2;
610 m_data = XRESIZEVEC (char, m_data, m_size);
611 offset_buffer (offset);
615 /* Read more data into the cache. Extends the cache if need be.
616 Returns TRUE iff new data could be read. */
618 bool
619 file_cache_slot::read_data ()
621 if (feof (m_fp) || ferror (m_fp))
622 return false;
624 maybe_grow ();
626 char * from = m_data + m_nb_read;
627 size_t to_read = m_size - m_nb_read;
628 size_t nb_read = fread (from, 1, to_read, m_fp);
630 if (ferror (m_fp))
631 return false;
633 m_nb_read += nb_read;
634 return !!nb_read;
637 /* Read new data iff the cache needs to be filled with more data
638 coming from the file FP. Return TRUE iff the cache was filled with
639 mode data. */
641 bool
642 file_cache_slot::maybe_read_data ()
644 if (!needs_read_p ())
645 return false;
646 return read_data ();
649 /* Read a new line from file FP, using C as a cache for the data
650 coming from the file. Upon successful completion, *LINE is set to
651 the beginning of the line found. *LINE points directly in the
652 line cache and is only valid until the next call of get_next_line.
653 *LINE_LEN is set to the length of the line. Note that the line
654 does not contain any terminal delimiter. This function returns
655 true if some data was read or process from the cache, false
656 otherwise. Note that subsequent calls to get_next_line might
657 make the content of *LINE invalid. */
659 bool
660 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
662 /* Fill the cache with data to process. */
663 maybe_read_data ();
665 size_t remaining_size = m_nb_read - m_line_start_idx;
666 if (remaining_size == 0)
667 /* There is no more data to process. */
668 return false;
670 char *line_start = m_data + m_line_start_idx;
672 char *next_line_start = NULL;
673 size_t len = 0;
674 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
675 if (line_end == NULL)
677 /* We haven't found the end-of-line delimiter in the cache.
678 Fill the cache with more data from the file and look for the
679 '\n'. */
680 while (maybe_read_data ())
682 line_start = m_data + m_line_start_idx;
683 remaining_size = m_nb_read - m_line_start_idx;
684 line_end = (char *) memchr (line_start, '\n', remaining_size);
685 if (line_end != NULL)
687 next_line_start = line_end + 1;
688 break;
691 if (line_end == NULL)
693 /* We've loadded all the file into the cache and still no
694 '\n'. Let's say the line ends up at one byte passed the
695 end of the file. This is to stay consistent with the case
696 of when the line ends up with a '\n' and line_end points to
697 that terminal '\n'. That consistency is useful below in
698 the len calculation. */
699 line_end = m_data + m_nb_read ;
700 m_missing_trailing_newline = true;
702 else
703 m_missing_trailing_newline = false;
705 else
707 next_line_start = line_end + 1;
708 m_missing_trailing_newline = false;
711 if (m_fp && ferror (m_fp))
712 return false;
714 /* At this point, we've found the end of the of line. It either
715 points to the '\n' or to one byte after the last byte of the
716 file. */
717 gcc_assert (line_end != NULL);
719 len = line_end - line_start;
721 if (m_line_start_idx < m_nb_read)
722 *line = line_start;
724 ++m_line_num;
726 /* Before we update our line record, make sure the hint about the
727 total number of lines of the file is correct. If it's not, then
728 we give up recording line boundaries from now on. */
729 bool update_line_record = true;
730 if (m_line_num > m_total_lines)
731 update_line_record = false;
733 /* Now update our line record so that re-reading lines from the
734 before m_line_start_idx is faster. */
735 if (update_line_record
736 && m_line_record.length () < line_record_size)
738 /* If the file lines fits in the line record, we just record all
739 its lines ...*/
740 if (m_total_lines <= line_record_size
741 && m_line_num > m_line_record.length ())
742 m_line_record.safe_push
743 (file_cache_slot::line_info (m_line_num,
744 m_line_start_idx,
745 line_end - m_data));
746 else if (m_total_lines > line_record_size)
748 /* ... otherwise, we just scale total_lines down to
749 (line_record_size lines. */
750 size_t n = (m_line_num * line_record_size) / m_total_lines;
751 if (m_line_record.length () == 0
752 || n >= m_line_record.length ())
753 m_line_record.safe_push
754 (file_cache_slot::line_info (m_line_num,
755 m_line_start_idx,
756 line_end - m_data));
760 /* Update m_line_start_idx so that it points to the next line to be
761 read. */
762 if (next_line_start)
763 m_line_start_idx = next_line_start - m_data;
764 else
765 /* We didn't find any terminal '\n'. Let's consider that the end
766 of line is the end of the data in the cache. The next
767 invocation of get_next_line will either read more data from the
768 underlying file or return false early because we've reached the
769 end of the file. */
770 m_line_start_idx = m_nb_read;
772 *line_len = len;
774 return true;
777 /* Consume the next bytes coming from the cache (or from its
778 underlying file if there are remaining unread bytes in the file)
779 until we reach the next end-of-line (or end-of-file). There is no
780 copying from the cache involved. Return TRUE upon successful
781 completion. */
783 bool
784 file_cache_slot::goto_next_line ()
786 char *l;
787 ssize_t len;
789 return get_next_line (&l, &len);
792 /* Read an arbitrary line number LINE_NUM from the file cached in C.
793 If the line was read successfully, *LINE points to the beginning
794 of the line in the file cache and *LINE_LEN is the length of the
795 line. *LINE is not nul-terminated, but may contain zero bytes.
796 *LINE is only valid until the next call of read_line_num.
797 This function returns bool if a line was read. */
799 bool
800 file_cache_slot::read_line_num (size_t line_num,
801 char ** line, ssize_t *line_len)
803 gcc_assert (line_num > 0);
805 if (line_num <= m_line_num)
807 /* We've been asked to read lines that are before m_line_num.
808 So lets use our line record (if it's not empty) to try to
809 avoid re-reading the file from the beginning again. */
811 if (m_line_record.is_empty ())
813 m_line_start_idx = 0;
814 m_line_num = 0;
816 else
818 file_cache_slot::line_info *i = NULL;
819 if (m_total_lines <= line_record_size)
821 /* In languages where the input file is not totally
822 preprocessed up front, the m_total_lines hint
823 can be smaller than the number of lines of the
824 file. In that case, only the first
825 m_total_lines have been recorded.
827 Otherwise, the first m_total_lines we've read have
828 their start/end recorded here. */
829 i = (line_num <= m_total_lines)
830 ? &m_line_record[line_num - 1]
831 : &m_line_record[m_total_lines - 1];
832 gcc_assert (i->line_num <= line_num);
834 else
836 /* So the file had more lines than our line record
837 size. Thus the number of lines we've recorded has
838 been scaled down to line_record_size. Let's
839 pick the start/end of the recorded line that is
840 closest to line_num. */
841 size_t n = (line_num <= m_total_lines)
842 ? line_num * line_record_size / m_total_lines
843 : m_line_record.length () - 1;
844 if (n < m_line_record.length ())
846 i = &m_line_record[n];
847 gcc_assert (i->line_num <= line_num);
851 if (i && i->line_num == line_num)
853 /* We have the start/end of the line. */
854 *line = m_data + i->start_pos;
855 *line_len = i->end_pos - i->start_pos;
856 return true;
859 if (i)
861 m_line_start_idx = i->start_pos;
862 m_line_num = i->line_num - 1;
864 else
866 m_line_start_idx = 0;
867 m_line_num = 0;
872 /* Let's walk from line m_line_num up to line_num - 1, without
873 copying any line. */
874 while (m_line_num < line_num - 1)
875 if (!goto_next_line ())
876 return false;
878 /* The line we want is the next one. Let's read and copy it back to
879 the caller. */
880 return get_next_line (line, line_len);
883 /* Return the physical source line that corresponds to FILE_PATH/LINE.
884 The line is not nul-terminated. The returned pointer is only
885 valid until the next call of location_get_source_line.
886 Note that the line can contain several null characters,
887 so the returned value's length has the actual length of the line.
888 If the function fails, a NULL char_span is returned. */
890 char_span
891 location_get_source_line (const char *file_path, int line)
893 char *buffer = NULL;
894 ssize_t len;
896 if (line == 0)
897 return char_span (NULL, 0);
899 if (file_path == NULL)
900 return char_span (NULL, 0);
902 diagnostic_file_cache_init ();
904 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
905 if (c == NULL)
906 return char_span (NULL, 0);
908 bool read = c->read_line_num (line, &buffer, &len);
909 if (!read)
910 return char_span (NULL, 0);
912 return char_span (buffer, len);
915 /* Determine if FILE_PATH missing a trailing newline on its final line.
916 Only valid to call once all of the file has been loaded, by
917 requesting a line number beyond the end of the file. */
919 bool
920 location_missing_trailing_newline (const char *file_path)
922 diagnostic_file_cache_init ();
924 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
925 if (c == NULL)
926 return false;
928 return c->missing_trailing_newline_p ();
931 /* Test if the location originates from the spelling location of a
932 builtin-tokens. That is, return TRUE if LOC is a (possibly
933 virtual) location of a built-in token that appears in the expansion
934 list of a macro. Please note that this function also works on
935 tokens that result from built-in tokens. For instance, the
936 function would return true if passed a token "4" that is the result
937 of the expansion of the built-in __LINE__ macro. */
938 bool
939 is_location_from_builtin_token (location_t loc)
941 const line_map_ordinary *map = NULL;
942 loc = linemap_resolve_location (line_table, loc,
943 LRK_SPELLING_LOCATION, &map);
944 return loc == BUILTINS_LOCATION;
947 /* Expand the source location LOC into a human readable location. If
948 LOC is virtual, it resolves to the expansion point of the involved
949 macro. If LOC resolves to a builtin location, the file name of the
950 readable location is set to the string "<built-in>". */
952 expanded_location
953 expand_location (location_t loc)
955 return expand_location_1 (loc, /*expansion_point_p=*/true,
956 LOCATION_ASPECT_CARET);
959 /* Expand the source location LOC into a human readable location. If
960 LOC is virtual, it resolves to the expansion location of the
961 relevant macro. If LOC resolves to a builtin location, the file
962 name of the readable location is set to the string
963 "<built-in>". */
965 expanded_location
966 expand_location_to_spelling_point (location_t loc,
967 enum location_aspect aspect)
969 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
972 /* The rich_location class within libcpp requires a way to expand
973 location_t instances, and relies on the client code
974 providing a symbol named
975 linemap_client_expand_location_to_spelling_point
976 to do this.
978 This is the implementation for libcommon.a (all host binaries),
979 which simply calls into expand_location_1. */
981 expanded_location
982 linemap_client_expand_location_to_spelling_point (location_t loc,
983 enum location_aspect aspect)
985 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
989 /* If LOCATION is in a system header and if it is a virtual location for
990 a token coming from the expansion of a macro, unwind it to the
991 location of the expansion point of the macro. Otherwise, just return
992 LOCATION.
994 This is used for instance when we want to emit diagnostics about a
995 token that may be located in a macro that is itself defined in a
996 system header, for example, for the NULL macro. In such a case, if
997 LOCATION were passed directly to diagnostic functions such as
998 warning_at, the diagnostic would be suppressed (unless
999 -Wsystem-headers). */
1001 location_t
1002 expansion_point_location_if_in_system_header (location_t location)
1004 if (in_system_header_at (location))
1005 location = linemap_resolve_location (line_table, location,
1006 LRK_MACRO_EXPANSION_POINT,
1007 NULL);
1008 return location;
1011 /* If LOCATION is a virtual location for a token coming from the expansion
1012 of a macro, unwind to the location of the expansion point of the macro. */
1014 location_t
1015 expansion_point_location (location_t location)
1017 return linemap_resolve_location (line_table, location,
1018 LRK_MACRO_EXPANSION_POINT, NULL);
1021 /* Construct a location with caret at CARET, ranging from START to
1022 finish e.g.
1024 11111111112
1025 12345678901234567890
1027 523 return foo + bar;
1028 ~~~~^~~~~
1031 The location's caret is at the "+", line 523 column 15, but starts
1032 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
1033 of "bar" at column 19. */
1035 location_t
1036 make_location (location_t caret, location_t start, location_t finish)
1038 location_t pure_loc = get_pure_location (caret);
1039 source_range src_range;
1040 src_range.m_start = get_start (start);
1041 src_range.m_finish = get_finish (finish);
1042 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
1043 pure_loc,
1044 src_range,
1045 NULL);
1046 return combined_loc;
1049 /* Same as above, but taking a source range rather than two locations. */
1051 location_t
1052 make_location (location_t caret, source_range src_range)
1054 location_t pure_loc = get_pure_location (caret);
1055 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
1058 /* An expanded_location stores the column in byte units. This function
1059 converts that column to display units. That requires reading the associated
1060 source line in order to calculate the display width. If that cannot be done
1061 for any reason, then returns the byte column as a fallback. */
1063 location_compute_display_column (expanded_location exploc,
1064 const cpp_char_column_policy &policy)
1066 if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1067 return exploc.column;
1068 char_span line = location_get_source_line (exploc.file, exploc.line);
1069 /* If line is NULL, this function returns exploc.column which is the
1070 desired fallback. */
1071 return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
1072 exploc.column, policy);
1075 /* Dump statistics to stderr about the memory usage of the line_table
1076 set of line maps. This also displays some statistics about macro
1077 expansion. */
1079 void
1080 dump_line_table_statistics (void)
1082 struct linemap_stats s;
1083 long total_used_map_size,
1084 macro_maps_size,
1085 total_allocated_map_size;
1087 memset (&s, 0, sizeof (s));
1089 linemap_get_statistics (line_table, &s);
1091 macro_maps_size = s.macro_maps_used_size
1092 + s.macro_maps_locations_size;
1094 total_allocated_map_size = s.ordinary_maps_allocated_size
1095 + s.macro_maps_allocated_size
1096 + s.macro_maps_locations_size;
1098 total_used_map_size = s.ordinary_maps_used_size
1099 + s.macro_maps_used_size
1100 + s.macro_maps_locations_size;
1102 fprintf (stderr, "Number of expanded macros: %5ld\n",
1103 s.num_expanded_macros);
1104 if (s.num_expanded_macros != 0)
1105 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
1106 s.num_macro_tokens / s.num_expanded_macros);
1107 fprintf (stderr,
1108 "\nLine Table allocations during the "
1109 "compilation process\n");
1110 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
1111 SIZE_AMOUNT (s.num_ordinary_maps_used));
1112 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
1113 SIZE_AMOUNT (s.ordinary_maps_used_size));
1114 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
1115 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1116 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
1117 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1118 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
1119 SIZE_AMOUNT (s.num_macro_maps_used));
1120 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
1121 SIZE_AMOUNT (s.macro_maps_used_size));
1122 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
1123 SIZE_AMOUNT (s.macro_maps_locations_size));
1124 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
1125 SIZE_AMOUNT (macro_maps_size));
1126 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
1127 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1128 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
1129 SIZE_AMOUNT (total_allocated_map_size));
1130 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
1131 SIZE_AMOUNT (total_used_map_size));
1132 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
1133 SIZE_AMOUNT (s.adhoc_table_size));
1134 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
1135 SIZE_AMOUNT (s.adhoc_table_entries_used));
1136 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
1137 SIZE_AMOUNT (line_table->num_optimized_ranges));
1138 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
1139 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
1141 fprintf (stderr, "\n");
1144 /* Get location one beyond the final location in ordinary map IDX. */
1146 static location_t
1147 get_end_location (class line_maps *set, unsigned int idx)
1149 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1150 return set->highest_location;
1152 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1153 return MAP_START_LOCATION (next_map);
1156 /* Helper function for write_digit_row. */
1158 static void
1159 write_digit (FILE *stream, int digit)
1161 fputc ('0' + (digit % 10), stream);
1164 /* Helper function for dump_location_info.
1165 Write a row of numbers to STREAM, numbering a source line,
1166 giving the units, tens, hundreds etc of the column number. */
1168 static void
1169 write_digit_row (FILE *stream, int indent,
1170 const line_map_ordinary *map,
1171 location_t loc, int max_col, int divisor)
1173 fprintf (stream, "%*c", indent, ' ');
1174 fprintf (stream, "|");
1175 for (int column = 1; column < max_col; column++)
1177 location_t column_loc = loc + (column << map->m_range_bits);
1178 write_digit (stream, column_loc / divisor);
1180 fprintf (stream, "\n");
1183 /* Write a half-closed (START) / half-open (END) interval of
1184 location_t to STREAM. */
1186 static void
1187 dump_location_range (FILE *stream,
1188 location_t start, location_t end)
1190 fprintf (stream,
1191 " location_t interval: %u <= loc < %u\n",
1192 start, end);
1195 /* Write a labelled description of a half-closed (START) / half-open (END)
1196 interval of location_t to STREAM. */
1198 static void
1199 dump_labelled_location_range (FILE *stream,
1200 const char *name,
1201 location_t start, location_t end)
1203 fprintf (stream, "%s\n", name);
1204 dump_location_range (stream, start, end);
1205 fprintf (stream, "\n");
1208 /* Write a visualization of the locations in the line_table to STREAM. */
1210 void
1211 dump_location_info (FILE *stream)
1213 /* Visualize the reserved locations. */
1214 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1215 0, RESERVED_LOCATION_COUNT);
1217 /* Visualize the ordinary line_map instances, rendering the sources. */
1218 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1220 location_t end_location = get_end_location (line_table, idx);
1221 /* half-closed: doesn't include this one. */
1223 const line_map_ordinary *map
1224 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1225 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1226 dump_location_range (stream,
1227 MAP_START_LOCATION (map), end_location);
1228 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1229 fprintf (stream, " starting at line: %i\n",
1230 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1231 fprintf (stream, " column and range bits: %i\n",
1232 map->m_column_and_range_bits);
1233 fprintf (stream, " column bits: %i\n",
1234 map->m_column_and_range_bits - map->m_range_bits);
1235 fprintf (stream, " range bits: %i\n",
1236 map->m_range_bits);
1237 const char * reason;
1238 switch (map->reason) {
1239 case LC_ENTER:
1240 reason = "LC_ENTER";
1241 break;
1242 case LC_LEAVE:
1243 reason = "LC_LEAVE";
1244 break;
1245 case LC_RENAME:
1246 reason = "LC_RENAME";
1247 break;
1248 case LC_RENAME_VERBATIM:
1249 reason = "LC_RENAME_VERBATIM";
1250 break;
1251 case LC_ENTER_MACRO:
1252 reason = "LC_RENAME_MACRO";
1253 break;
1254 default:
1255 reason = "Unknown";
1257 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1259 const line_map_ordinary *includer_map
1260 = linemap_included_from_linemap (line_table, map);
1261 fprintf (stream, " included from location: %d",
1262 linemap_included_from (map));
1263 if (includer_map) {
1264 fprintf (stream, " (in ordinary map %d)",
1265 int (includer_map - line_table->info_ordinary.maps));
1267 fprintf (stream, "\n");
1269 /* Render the span of source lines that this "map" covers. */
1270 for (location_t loc = MAP_START_LOCATION (map);
1271 loc < end_location;
1272 loc += (1 << map->m_range_bits) )
1274 gcc_assert (pure_location_p (line_table, loc) );
1276 expanded_location exploc
1277 = linemap_expand_location (line_table, map, loc);
1279 if (exploc.column == 0)
1281 /* Beginning of a new source line: draw the line. */
1283 char_span line_text = location_get_source_line (exploc.file,
1284 exploc.line);
1285 if (!line_text)
1286 break;
1287 fprintf (stream,
1288 "%s:%3i|loc:%5i|%.*s\n",
1289 exploc.file, exploc.line,
1290 loc,
1291 (int)line_text.length (), line_text.get_buffer ());
1293 /* "loc" is at column 0, which means "the whole line".
1294 Render the locations *within* the line, by underlining
1295 it, showing the location_t numeric values
1296 at each column. */
1297 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1298 if (max_col > line_text.length ())
1299 max_col = line_text.length () + 1;
1301 int len_lnum = num_digits (exploc.line);
1302 if (len_lnum < 3)
1303 len_lnum = 3;
1304 int len_loc = num_digits (loc);
1305 if (len_loc < 5)
1306 len_loc = 5;
1308 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1310 /* Thousands. */
1311 if (end_location > 999)
1312 write_digit_row (stream, indent, map, loc, max_col, 1000);
1314 /* Hundreds. */
1315 if (end_location > 99)
1316 write_digit_row (stream, indent, map, loc, max_col, 100);
1318 /* Tens. */
1319 write_digit_row (stream, indent, map, loc, max_col, 10);
1321 /* Units. */
1322 write_digit_row (stream, indent, map, loc, max_col, 1);
1325 fprintf (stream, "\n");
1328 /* Visualize unallocated values. */
1329 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1330 line_table->highest_location,
1331 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1333 /* Visualize the macro line_map instances, rendering the sources. */
1334 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1336 /* Each macro map that is allocated owns location_t values
1337 that are *lower* that the one before them.
1338 Hence it's meaningful to view them either in order of ascending
1339 source locations, or in order of ascending macro map index. */
1340 const bool ascending_location_ts = true;
1341 unsigned int idx = (ascending_location_ts
1342 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1343 : i);
1344 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1345 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1346 idx,
1347 linemap_map_get_macro_name (map),
1348 MACRO_MAP_NUM_MACRO_TOKENS (map));
1349 dump_location_range (stream,
1350 map->start_location,
1351 (map->start_location
1352 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1353 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1354 "expansion point is location %i",
1355 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1356 fprintf (stream, " map->start_location: %u\n",
1357 map->start_location);
1359 fprintf (stream, " macro_locations:\n");
1360 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1362 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1363 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1365 /* linemap_add_macro_token encodes token numbers in an expansion
1366 by putting them after MAP_START_LOCATION. */
1368 /* I'm typically seeing 4 uninitialized entries at the end of
1369 0xafafafaf.
1370 This appears to be due to macro.c:replace_args
1371 adding 2 extra args for padding tokens; presumably there may
1372 be a leading and/or trailing padding token injected,
1373 each for 2 more location slots.
1374 This would explain there being up to 4 location_ts slots
1375 that may be uninitialized. */
1377 fprintf (stream, " %u: %u, %u\n",
1381 if (x == y)
1383 if (x < MAP_START_LOCATION (map))
1384 inform (x, "token %u has %<x-location == y-location == %u%>",
1385 i, x);
1386 else
1387 fprintf (stream,
1388 "x-location == y-location == %u encodes token # %u\n",
1389 x, x - MAP_START_LOCATION (map));
1391 else
1393 inform (x, "token %u has %<x-location == %u%>", i, x);
1394 inform (x, "token %u has %<y-location == %u%>", i, y);
1397 fprintf (stream, "\n");
1400 /* It appears that MAX_LOCATION_T itself is never assigned to a
1401 macro map, presumably due to an off-by-one error somewhere
1402 between the logic in linemap_enter_macro and
1403 LINEMAPS_MACRO_LOWEST_LOCATION. */
1404 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1405 MAX_LOCATION_T,
1406 MAX_LOCATION_T + 1);
1408 /* Visualize ad-hoc values. */
1409 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1410 MAX_LOCATION_T + 1, UINT_MAX);
1413 /* string_concat's constructor. */
1415 string_concat::string_concat (int num, location_t *locs)
1416 : m_num (num)
1418 m_locs = ggc_vec_alloc <location_t> (num);
1419 for (int i = 0; i < num; i++)
1420 m_locs[i] = locs[i];
1423 /* string_concat_db's constructor. */
1425 string_concat_db::string_concat_db ()
1427 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1430 /* Record that a string concatenation occurred, covering NUM
1431 string literal tokens. LOCS is an array of size NUM, containing the
1432 locations of the tokens. A copy of LOCS is taken. */
1434 void
1435 string_concat_db::record_string_concatenation (int num, location_t *locs)
1437 gcc_assert (num > 1);
1438 gcc_assert (locs);
1440 location_t key_loc = get_key_loc (locs[0]);
1441 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1442 any data now recorded under key 'key_loc' would be overwritten by a
1443 subsequent call with the same key 'key_loc'. */
1444 if (RESERVED_LOCATION_P (key_loc))
1445 return;
1447 string_concat *concat
1448 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1449 m_table->put (key_loc, concat);
1452 /* Determine if LOC was the location of the initial token of a
1453 concatenation of string literal tokens.
1454 If so, *OUT_NUM is written to with the number of tokens, and
1455 *OUT_LOCS with the location of an array of locations of the
1456 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1457 storage owned by the string_concat_db.
1458 Otherwise, return false. */
1460 bool
1461 string_concat_db::get_string_concatenation (location_t loc,
1462 int *out_num,
1463 location_t **out_locs)
1465 gcc_assert (out_num);
1466 gcc_assert (out_locs);
1468 location_t key_loc = get_key_loc (loc);
1469 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1470 discussion in 'string_concat_db::record_string_concatenation'. */
1471 if (RESERVED_LOCATION_P (key_loc))
1472 return false;
1474 string_concat **concat = m_table->get (key_loc);
1475 if (!concat)
1476 return false;
1478 *out_num = (*concat)->m_num;
1479 *out_locs =(*concat)->m_locs;
1480 return true;
1483 /* Internal function. Canonicalize LOC into a form suitable for
1484 use as a key within the database, stripping away macro expansion,
1485 ad-hoc information, and range information, using the location of
1486 the start of LOC within an ordinary linemap. */
1488 location_t
1489 string_concat_db::get_key_loc (location_t loc)
1491 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1492 NULL);
1494 loc = get_range_from_loc (line_table, loc).m_start;
1496 return loc;
1499 /* Helper class for use within get_substring_ranges_for_loc.
1500 An vec of cpp_string with responsibility for releasing all of the
1501 str->text for each str in the vector. */
1503 class auto_cpp_string_vec : public auto_vec <cpp_string>
1505 public:
1506 auto_cpp_string_vec (int alloc)
1507 : auto_vec <cpp_string> (alloc) {}
1509 ~auto_cpp_string_vec ()
1511 /* Clean up the copies within this vec. */
1512 int i;
1513 cpp_string *str;
1514 FOR_EACH_VEC_ELT (*this, i, str)
1515 free (const_cast <unsigned char *> (str->text));
1519 /* Attempt to populate RANGES with source location information on the
1520 individual characters within the string literal found at STRLOC.
1521 If CONCATS is non-NULL, then any string literals that the token at
1522 STRLOC was concatenated with are also added to RANGES.
1524 Return NULL if successful, or an error message if any errors occurred (in
1525 which case RANGES may be only partially populated and should not
1526 be used).
1528 This is implemented by re-parsing the relevant source line(s). */
1530 static const char *
1531 get_substring_ranges_for_loc (cpp_reader *pfile,
1532 string_concat_db *concats,
1533 location_t strloc,
1534 enum cpp_ttype type,
1535 cpp_substring_ranges &ranges)
1537 gcc_assert (pfile);
1539 if (strloc == UNKNOWN_LOCATION)
1540 return "unknown location";
1542 /* Reparsing the strings requires accurate location information.
1543 If -ftrack-macro-expansion has been overridden from its default
1544 of 2, then we might have a location of a macro expansion point,
1545 rather than the location of the literal itself.
1546 Avoid this by requiring that we have full macro expansion tracking
1547 for substring locations to be available. */
1548 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1549 return "track_macro_expansion != 2";
1551 /* If #line or # 44 "file"-style directives are present, then there's
1552 no guarantee that the line numbers we have can be used to locate
1553 the strings. For example, we might have a .i file with # directives
1554 pointing back to lines within a .c file, but the .c file might
1555 have been edited since the .i file was created.
1556 In such a case, the safest course is to disable on-demand substring
1557 locations. */
1558 if (line_table->seen_line_directive)
1559 return "seen line directive";
1561 /* If string concatenation has occurred at STRLOC, get the locations
1562 of all of the literal tokens making up the compound string.
1563 Otherwise, just use STRLOC. */
1564 int num_locs = 1;
1565 location_t *strlocs = &strloc;
1566 if (concats)
1567 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1569 auto_cpp_string_vec strs (num_locs);
1570 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1571 for (int i = 0; i < num_locs; i++)
1573 /* Get range of strloc. We will use it to locate the start and finish
1574 of the literal token within the line. */
1575 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1577 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1579 /* If the string token was within a macro expansion, then we can
1580 cope with it for the simple case where we have a single token.
1581 Otherwise, bail out. */
1582 if (src_range.m_start != src_range.m_finish)
1583 return "macro expansion";
1585 else
1587 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1588 /* If so, we can't reliably determine where the token started within
1589 its line. */
1590 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1592 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1593 /* If so, we can't reliably determine where the token finished
1594 within its line. */
1595 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1598 expanded_location start
1599 = expand_location_to_spelling_point (src_range.m_start,
1600 LOCATION_ASPECT_START);
1601 expanded_location finish
1602 = expand_location_to_spelling_point (src_range.m_finish,
1603 LOCATION_ASPECT_FINISH);
1604 if (start.file != finish.file)
1605 return "range endpoints are in different files";
1606 if (start.line != finish.line)
1607 return "range endpoints are on different lines";
1608 if (start.column > finish.column)
1609 return "range endpoints are reversed";
1611 char_span line = location_get_source_line (start.file, start.line);
1612 if (!line)
1613 return "unable to read source line";
1615 /* Determine the location of the literal (including quotes
1616 and leading prefix chars, such as the 'u' in a u""
1617 token). */
1618 size_t literal_length = finish.column - start.column + 1;
1620 /* Ensure that we don't crash if we got the wrong location. */
1621 if (start.column < 1)
1622 return "zero start column";
1623 if (line.length () < (start.column - 1 + literal_length))
1624 return "line is not wide enough";
1626 char_span literal = line.subspan (start.column - 1, literal_length);
1628 cpp_string from;
1629 from.len = literal_length;
1630 /* Make a copy of the literal, to avoid having to rely on
1631 the lifetime of the copy of the line within the cache.
1632 This will be released by the auto_cpp_string_vec dtor. */
1633 from.text = (unsigned char *)literal.xstrdup ();
1634 strs.safe_push (from);
1636 /* For very long lines, a new linemap could have started
1637 halfway through the token.
1638 Ensure that the loc_reader uses the linemap of the
1639 *end* of the token for its start location. */
1640 const line_map_ordinary *start_ord_map;
1641 linemap_resolve_location (line_table, src_range.m_start,
1642 LRK_SPELLING_LOCATION, &start_ord_map);
1643 const line_map_ordinary *final_ord_map;
1644 linemap_resolve_location (line_table, src_range.m_finish,
1645 LRK_SPELLING_LOCATION, &final_ord_map);
1646 if (start_ord_map == NULL || final_ord_map == NULL)
1647 return "failed to get ordinary maps";
1648 /* Bulletproofing. We ought to only have different ordinary maps
1649 for start vs finish due to line-length jumps. */
1650 if (start_ord_map != final_ord_map
1651 && start_ord_map->to_file != final_ord_map->to_file)
1652 return "start and finish are spelled in different ordinary maps";
1653 /* The file from linemap_resolve_location ought to match that from
1654 expand_location_to_spelling_point. */
1655 if (start_ord_map->to_file != start.file)
1656 return "mismatching file after resolving linemap";
1658 location_t start_loc
1659 = linemap_position_for_line_and_column (line_table, final_ord_map,
1660 start.line, start.column);
1662 cpp_string_location_reader loc_reader (start_loc, line_table);
1663 loc_readers.safe_push (loc_reader);
1666 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1667 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1668 loc_readers.address (),
1669 num_locs, &ranges, type);
1670 if (err)
1671 return err;
1673 /* Success: "ranges" should now contain information on the string. */
1674 return NULL;
1677 /* Attempt to populate *OUT_LOC with source location information on the
1678 given characters within the string literal found at STRLOC.
1679 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1680 character set.
1682 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1683 and string literal "012345\n789"
1684 *OUT_LOC is written to with:
1685 "012345\n789"
1686 ~^~~~~
1688 If CONCATS is non-NULL, then any string literals that the token at
1689 STRLOC was concatenated with are also considered.
1691 This is implemented by re-parsing the relevant source line(s).
1693 Return NULL if successful, or an error message if any errors occurred.
1694 Error messages are intended for GCC developers (to help debugging) rather
1695 than for end-users. */
1697 const char *
1698 get_location_within_string (cpp_reader *pfile,
1699 string_concat_db *concats,
1700 location_t strloc,
1701 enum cpp_ttype type,
1702 int caret_idx, int start_idx, int end_idx,
1703 location_t *out_loc)
1705 gcc_checking_assert (caret_idx >= 0);
1706 gcc_checking_assert (start_idx >= 0);
1707 gcc_checking_assert (end_idx >= 0);
1708 gcc_assert (out_loc);
1710 cpp_substring_ranges ranges;
1711 const char *err
1712 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1713 if (err)
1714 return err;
1716 if (caret_idx >= ranges.get_num_ranges ())
1717 return "caret_idx out of range";
1718 if (start_idx >= ranges.get_num_ranges ())
1719 return "start_idx out of range";
1720 if (end_idx >= ranges.get_num_ranges ())
1721 return "end_idx out of range";
1723 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1724 ranges.get_range (start_idx).m_start,
1725 ranges.get_range (end_idx).m_finish);
1726 return NULL;
1729 #if CHECKING_P
1731 namespace selftest {
1733 /* Selftests of location handling. */
1735 /* Attempt to populate *OUT_RANGE with source location information on the
1736 given character within the string literal found at STRLOC.
1737 CHAR_IDX refers to an offset within the execution character set.
1738 If CONCATS is non-NULL, then any string literals that the token at
1739 STRLOC was concatenated with are also considered.
1741 This is implemented by re-parsing the relevant source line(s).
1743 Return NULL if successful, or an error message if any errors occurred.
1744 Error messages are intended for GCC developers (to help debugging) rather
1745 than for end-users. */
1747 static const char *
1748 get_source_range_for_char (cpp_reader *pfile,
1749 string_concat_db *concats,
1750 location_t strloc,
1751 enum cpp_ttype type,
1752 int char_idx,
1753 source_range *out_range)
1755 gcc_checking_assert (char_idx >= 0);
1756 gcc_assert (out_range);
1758 cpp_substring_ranges ranges;
1759 const char *err
1760 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1761 if (err)
1762 return err;
1764 if (char_idx >= ranges.get_num_ranges ())
1765 return "char_idx out of range";
1767 *out_range = ranges.get_range (char_idx);
1768 return NULL;
1771 /* As get_source_range_for_char, but write to *OUT the number
1772 of ranges that are available. */
1774 static const char *
1775 get_num_source_ranges_for_substring (cpp_reader *pfile,
1776 string_concat_db *concats,
1777 location_t strloc,
1778 enum cpp_ttype type,
1779 int *out)
1781 gcc_assert (out);
1783 cpp_substring_ranges ranges;
1784 const char *err
1785 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1787 if (err)
1788 return err;
1790 *out = ranges.get_num_ranges ();
1791 return NULL;
1794 /* Selftests of location handling. */
1796 /* Verify that compare() on linenum_type handles comparisons over the full
1797 range of the type. */
1799 static void
1800 test_linenum_comparisons ()
1802 linenum_type min_line (0);
1803 linenum_type max_line (0xffffffff);
1804 ASSERT_EQ (0, compare (min_line, min_line));
1805 ASSERT_EQ (0, compare (max_line, max_line));
1807 ASSERT_GT (compare (max_line, min_line), 0);
1808 ASSERT_LT (compare (min_line, max_line), 0);
1811 /* Helper function for verifying location data: when location_t
1812 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1813 as having column 0. */
1815 static bool
1816 should_have_column_data_p (location_t loc)
1818 if (IS_ADHOC_LOC (loc))
1819 loc = get_location_from_adhoc_loc (line_table, loc);
1820 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1821 return false;
1822 return true;
1825 /* Selftest for should_have_column_data_p. */
1827 static void
1828 test_should_have_column_data_p ()
1830 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1831 ASSERT_TRUE
1832 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1833 ASSERT_FALSE
1834 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1837 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1838 on LOC. */
1840 static void
1841 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1842 location_t loc)
1844 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1845 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1846 /* If location_t values are sufficiently high, then column numbers
1847 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1848 When close to the threshold, column numbers *may* be present: if
1849 the final linemap before the threshold contains a line that straddles
1850 the threshold, locations in that line have column information. */
1851 if (should_have_column_data_p (loc))
1852 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1855 /* Various selftests involve constructing a line table and one or more
1856 line maps within it.
1858 For maximum test coverage we want to run these tests with a variety
1859 of situations:
1860 - line_table->default_range_bits: some frontends use a non-zero value
1861 and others use zero
1862 - the fallback modes within line-map.c: there are various threshold
1863 values for location_t beyond line-map.c changes
1864 behavior (disabling of the range-packing optimization, disabling
1865 of column-tracking). We can exercise these by starting the line_table
1866 at interesting values at or near these thresholds.
1868 The following struct describes a particular case within our test
1869 matrix. */
1871 class line_table_case
1873 public:
1874 line_table_case (int default_range_bits, int base_location)
1875 : m_default_range_bits (default_range_bits),
1876 m_base_location (base_location)
1879 int m_default_range_bits;
1880 int m_base_location;
1883 /* Constructor. Store the old value of line_table, and create a new
1884 one, using sane defaults. */
1886 line_table_test::line_table_test ()
1888 gcc_assert (saved_line_table == NULL);
1889 saved_line_table = line_table;
1890 line_table = ggc_alloc<line_maps> ();
1891 linemap_init (line_table, BUILTINS_LOCATION);
1892 gcc_assert (saved_line_table->reallocator);
1893 line_table->reallocator = saved_line_table->reallocator;
1894 gcc_assert (saved_line_table->round_alloc_size);
1895 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1896 line_table->default_range_bits = 0;
1899 /* Constructor. Store the old value of line_table, and create a new
1900 one, using the sitation described in CASE_. */
1902 line_table_test::line_table_test (const line_table_case &case_)
1904 gcc_assert (saved_line_table == NULL);
1905 saved_line_table = line_table;
1906 line_table = ggc_alloc<line_maps> ();
1907 linemap_init (line_table, BUILTINS_LOCATION);
1908 gcc_assert (saved_line_table->reallocator);
1909 line_table->reallocator = saved_line_table->reallocator;
1910 gcc_assert (saved_line_table->round_alloc_size);
1911 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1912 line_table->default_range_bits = case_.m_default_range_bits;
1913 if (case_.m_base_location)
1915 line_table->highest_location = case_.m_base_location;
1916 line_table->highest_line = case_.m_base_location;
1920 /* Destructor. Restore the old value of line_table. */
1922 line_table_test::~line_table_test ()
1924 gcc_assert (saved_line_table != NULL);
1925 line_table = saved_line_table;
1926 saved_line_table = NULL;
1929 /* Verify basic operation of ordinary linemaps. */
1931 static void
1932 test_accessing_ordinary_linemaps (const line_table_case &case_)
1934 line_table_test ltt (case_);
1936 /* Build a simple linemap describing some locations. */
1937 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1939 linemap_line_start (line_table, 1, 100);
1940 location_t loc_a = linemap_position_for_column (line_table, 1);
1941 location_t loc_b = linemap_position_for_column (line_table, 23);
1943 linemap_line_start (line_table, 2, 100);
1944 location_t loc_c = linemap_position_for_column (line_table, 1);
1945 location_t loc_d = linemap_position_for_column (line_table, 17);
1947 /* Example of a very long line. */
1948 linemap_line_start (line_table, 3, 2000);
1949 location_t loc_e = linemap_position_for_column (line_table, 700);
1951 /* Transitioning back to a short line. */
1952 linemap_line_start (line_table, 4, 0);
1953 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1955 if (should_have_column_data_p (loc_back_to_short))
1957 /* Verify that we switched to short lines in the linemap. */
1958 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1959 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1962 /* Example of a line that will eventually be seen to be longer
1963 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1964 below that. */
1965 linemap_line_start (line_table, 5, 2000);
1967 location_t loc_start_of_very_long_line
1968 = linemap_position_for_column (line_table, 2000);
1969 location_t loc_too_wide
1970 = linemap_position_for_column (line_table, 4097);
1971 location_t loc_too_wide_2
1972 = linemap_position_for_column (line_table, 4098);
1974 /* ...and back to a sane line length. */
1975 linemap_line_start (line_table, 6, 100);
1976 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1978 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1980 /* Multiple files. */
1981 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1982 linemap_line_start (line_table, 1, 200);
1983 location_t loc_f = linemap_position_for_column (line_table, 150);
1984 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1986 /* Verify that we can recover the location info. */
1987 assert_loceq ("foo.c", 1, 1, loc_a);
1988 assert_loceq ("foo.c", 1, 23, loc_b);
1989 assert_loceq ("foo.c", 2, 1, loc_c);
1990 assert_loceq ("foo.c", 2, 17, loc_d);
1991 assert_loceq ("foo.c", 3, 700, loc_e);
1992 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1994 /* In the very wide line, the initial location should be fully tracked. */
1995 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1996 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1997 be disabled. */
1998 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1999 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
2000 /*...and column-tracking should be re-enabled for subsequent lines. */
2001 assert_loceq ("foo.c", 6, 10, loc_sane_again);
2003 assert_loceq ("bar.c", 1, 150, loc_f);
2005 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2006 ASSERT_TRUE (pure_location_p (line_table, loc_a));
2008 /* Verify using make_location to build a range, and extracting data
2009 back from it. */
2010 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2011 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2012 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2013 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2014 ASSERT_EQ (loc_b, src_range.m_start);
2015 ASSERT_EQ (loc_d, src_range.m_finish);
2018 /* Verify various properties of UNKNOWN_LOCATION. */
2020 static void
2021 test_unknown_location ()
2023 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2024 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2025 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2028 /* Verify various properties of BUILTINS_LOCATION. */
2030 static void
2031 test_builtins ()
2033 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
2034 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2037 /* Regression test for make_location.
2038 Ensure that we use pure locations for the start/finish of the range,
2039 rather than storing a packed or ad-hoc range as the start/finish. */
2041 static void
2042 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2044 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2045 with C++ frontend.
2046 ....................0000000001111111111222.
2047 ....................1234567890123456789012. */
2048 const char *content = " r += !aaa == bbb;\n";
2049 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2050 line_table_test ltt (case_);
2051 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2053 const location_t c11 = linemap_position_for_column (line_table, 11);
2054 const location_t c12 = linemap_position_for_column (line_table, 12);
2055 const location_t c13 = linemap_position_for_column (line_table, 13);
2056 const location_t c14 = linemap_position_for_column (line_table, 14);
2057 const location_t c21 = linemap_position_for_column (line_table, 21);
2059 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2060 return;
2062 /* Use column 13 for the caret location, arbitrarily, to verify that we
2063 handle start != caret. */
2064 const location_t aaa = make_location (c13, c12, c14);
2065 ASSERT_EQ (c13, get_pure_location (aaa));
2066 ASSERT_EQ (c12, get_start (aaa));
2067 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2068 ASSERT_EQ (c14, get_finish (aaa));
2069 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2071 /* Make a location using a location with a range as the start-point. */
2072 const location_t not_aaa = make_location (c11, aaa, c14);
2073 ASSERT_EQ (c11, get_pure_location (not_aaa));
2074 /* It should use the start location of the range, not store the range
2075 itself. */
2076 ASSERT_EQ (c12, get_start (not_aaa));
2077 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2078 ASSERT_EQ (c14, get_finish (not_aaa));
2079 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2081 /* Similarly, make a location with a range as the end-point. */
2082 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2083 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2084 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2085 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2086 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2087 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2088 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2089 /* It should use the finish location of the range, not store the range
2090 itself. */
2091 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2092 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2093 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2094 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2095 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2098 /* Verify reading of input files (e.g. for caret-based diagnostics). */
2100 static void
2101 test_reading_source_line ()
2103 /* Create a tempfile and write some text to it. */
2104 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2105 "01234567890123456789\n"
2106 "This is the test text\n"
2107 "This is the 3rd line");
2109 /* Read back a specific line from the tempfile. */
2110 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2111 ASSERT_TRUE (source_line);
2112 ASSERT_TRUE (source_line.get_buffer () != NULL);
2113 ASSERT_EQ (20, source_line.length ());
2114 ASSERT_TRUE (!strncmp ("This is the 3rd line",
2115 source_line.get_buffer (), source_line.length ()));
2117 source_line = location_get_source_line (tmp.get_filename (), 2);
2118 ASSERT_TRUE (source_line);
2119 ASSERT_TRUE (source_line.get_buffer () != NULL);
2120 ASSERT_EQ (21, source_line.length ());
2121 ASSERT_TRUE (!strncmp ("This is the test text",
2122 source_line.get_buffer (), source_line.length ()));
2124 source_line = location_get_source_line (tmp.get_filename (), 4);
2125 ASSERT_FALSE (source_line);
2126 ASSERT_TRUE (source_line.get_buffer () == NULL);
2129 /* Tests of lexing. */
2131 /* Verify that token TOK from PARSER has cpp_token_as_text
2132 equal to EXPECTED_TEXT. */
2134 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
2135 SELFTEST_BEGIN_STMT \
2136 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
2137 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
2138 SELFTEST_END_STMT
2140 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2141 and ranges from EXP_START_COL to EXP_FINISH_COL.
2142 Use LOC as the effective location of the selftest. */
2144 static void
2145 assert_token_loc_eq (const location &loc,
2146 const cpp_token *tok,
2147 const char *exp_filename, int exp_linenum,
2148 int exp_start_col, int exp_finish_col)
2150 location_t tok_loc = tok->src_loc;
2151 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2152 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2154 /* If location_t values are sufficiently high, then column numbers
2155 will be unavailable. */
2156 if (!should_have_column_data_p (tok_loc))
2157 return;
2159 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2160 source_range tok_range = get_range_from_loc (line_table, tok_loc);
2161 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2162 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2165 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2166 SELFTEST_LOCATION as the effective location of the selftest. */
2168 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2169 EXP_START_COL, EXP_FINISH_COL) \
2170 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2171 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2173 /* Test of lexing a file using libcpp, verifying tokens and their
2174 location information. */
2176 static void
2177 test_lexer (const line_table_case &case_)
2179 /* Create a tempfile and write some text to it. */
2180 const char *content =
2181 /*00000000011111111112222222222333333.3333444444444.455555555556
2182 12345678901234567890123456789012345.6789012345678.901234567890. */
2183 ("test_name /* c-style comment */\n"
2184 " \"test literal\"\n"
2185 " // test c++-style comment\n"
2186 " 42\n");
2187 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2189 line_table_test ltt (case_);
2191 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2193 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2194 ASSERT_NE (fname, NULL);
2196 /* Verify that we get the expected tokens back, with the correct
2197 location information. */
2199 location_t loc;
2200 const cpp_token *tok;
2201 tok = cpp_get_token_with_location (parser, &loc);
2202 ASSERT_NE (tok, NULL);
2203 ASSERT_EQ (tok->type, CPP_NAME);
2204 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2205 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2207 tok = cpp_get_token_with_location (parser, &loc);
2208 ASSERT_NE (tok, NULL);
2209 ASSERT_EQ (tok->type, CPP_STRING);
2210 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2211 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2213 tok = cpp_get_token_with_location (parser, &loc);
2214 ASSERT_NE (tok, NULL);
2215 ASSERT_EQ (tok->type, CPP_NUMBER);
2216 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2217 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2219 tok = cpp_get_token_with_location (parser, &loc);
2220 ASSERT_NE (tok, NULL);
2221 ASSERT_EQ (tok->type, CPP_EOF);
2223 cpp_finish (parser, NULL);
2224 cpp_destroy (parser);
2227 /* Forward decls. */
2229 class lexer_test;
2230 class lexer_test_options;
2232 /* A class for specifying options of a lexer_test.
2233 The "apply" vfunc is called during the lexer_test constructor. */
2235 class lexer_test_options
2237 public:
2238 virtual void apply (lexer_test &) = 0;
2241 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2242 in its dtor.
2244 This is needed by struct lexer_test to ensure that the cleanup of the
2245 cpp_reader happens *after* the cleanup of the temp_source_file. */
2247 class cpp_reader_ptr
2249 public:
2250 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2252 ~cpp_reader_ptr ()
2254 cpp_finish (m_ptr, NULL);
2255 cpp_destroy (m_ptr);
2258 operator cpp_reader * () const { return m_ptr; }
2260 private:
2261 cpp_reader *m_ptr;
2264 /* A struct for writing lexer tests. */
2266 class lexer_test
2268 public:
2269 lexer_test (const line_table_case &case_, const char *content,
2270 lexer_test_options *options);
2271 ~lexer_test ();
2273 const cpp_token *get_token ();
2275 /* The ordering of these fields matters.
2276 The line_table_test must be first, since the cpp_reader_ptr
2277 uses it.
2278 The cpp_reader must be cleaned up *after* the temp_source_file
2279 since the filenames in input.c's input cache are owned by the
2280 cpp_reader; in particular, when ~temp_source_file evicts the
2281 filename the filenames must still be alive. */
2282 line_table_test m_ltt;
2283 cpp_reader_ptr m_parser;
2284 temp_source_file m_tempfile;
2285 string_concat_db m_concats;
2286 bool m_implicitly_expect_EOF;
2289 /* Use an EBCDIC encoding for the execution charset, specifically
2290 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2292 This exercises iconv integration within libcpp.
2293 Not every build of iconv supports the given charset,
2294 so we need to flag this error and handle it gracefully. */
2296 class ebcdic_execution_charset : public lexer_test_options
2298 public:
2299 ebcdic_execution_charset () : m_num_iconv_errors (0)
2301 gcc_assert (s_singleton == NULL);
2302 s_singleton = this;
2304 ~ebcdic_execution_charset ()
2306 gcc_assert (s_singleton == this);
2307 s_singleton = NULL;
2310 void apply (lexer_test &test) FINAL OVERRIDE
2312 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2313 cpp_opts->narrow_charset = "IBM1047";
2315 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2316 callbacks->diagnostic = on_diagnostic;
2319 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2320 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2321 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2322 rich_location *richloc ATTRIBUTE_UNUSED,
2323 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2324 ATTRIBUTE_FPTR_PRINTF(5,0)
2326 gcc_assert (s_singleton);
2327 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2328 const char *msg = "conversion from %s to %s not supported by iconv";
2329 #ifdef ENABLE_NLS
2330 msg = dgettext ("cpplib", msg);
2331 #endif
2332 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2333 when the local iconv build doesn't support the conversion. */
2334 if (strcmp (msgid, msg) == 0)
2336 s_singleton->m_num_iconv_errors++;
2337 return true;
2340 /* Otherwise, we have an unexpected error. */
2341 abort ();
2344 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2346 private:
2347 static ebcdic_execution_charset *s_singleton;
2348 int m_num_iconv_errors;
2351 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2353 /* A lexer_test_options subclass that records a list of diagnostic
2354 messages emitted by the lexer. */
2356 class lexer_diagnostic_sink : public lexer_test_options
2358 public:
2359 lexer_diagnostic_sink ()
2361 gcc_assert (s_singleton == NULL);
2362 s_singleton = this;
2364 ~lexer_diagnostic_sink ()
2366 gcc_assert (s_singleton == this);
2367 s_singleton = NULL;
2369 int i;
2370 char *str;
2371 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2372 free (str);
2375 void apply (lexer_test &test) FINAL OVERRIDE
2377 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2378 callbacks->diagnostic = on_diagnostic;
2381 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2382 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2383 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2384 rich_location *richloc ATTRIBUTE_UNUSED,
2385 const char *msgid, va_list *ap)
2386 ATTRIBUTE_FPTR_PRINTF(5,0)
2388 char *msg = xvasprintf (msgid, *ap);
2389 s_singleton->m_diagnostics.safe_push (msg);
2390 return true;
2393 auto_vec<char *> m_diagnostics;
2395 private:
2396 static lexer_diagnostic_sink *s_singleton;
2399 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2401 /* Constructor. Override line_table with a new instance based on CASE_,
2402 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2403 start parsing the tempfile. */
2405 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2406 lexer_test_options *options)
2407 : m_ltt (case_),
2408 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2409 /* Create a tempfile and write the text to it. */
2410 m_tempfile (SELFTEST_LOCATION, ".c", content),
2411 m_concats (),
2412 m_implicitly_expect_EOF (true)
2414 if (options)
2415 options->apply (*this);
2417 cpp_init_iconv (m_parser);
2419 /* Parse the file. */
2420 const char *fname = cpp_read_main_file (m_parser,
2421 m_tempfile.get_filename ());
2422 ASSERT_NE (fname, NULL);
2425 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2427 lexer_test::~lexer_test ()
2429 location_t loc;
2430 const cpp_token *tok;
2432 if (m_implicitly_expect_EOF)
2434 tok = cpp_get_token_with_location (m_parser, &loc);
2435 ASSERT_NE (tok, NULL);
2436 ASSERT_EQ (tok->type, CPP_EOF);
2440 /* Get the next token from m_parser. */
2442 const cpp_token *
2443 lexer_test::get_token ()
2445 location_t loc;
2446 const cpp_token *tok;
2448 tok = cpp_get_token_with_location (m_parser, &loc);
2449 ASSERT_NE (tok, NULL);
2450 return tok;
2453 /* Verify that locations within string literals are correctly handled. */
2455 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2456 using the string concatenation database for TEST.
2458 Assert that the character at index IDX is on EXPECTED_LINE,
2459 and that it begins at column EXPECTED_START_COL and ends at
2460 EXPECTED_FINISH_COL (unless the locations are beyond
2461 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2462 columns). */
2464 static void
2465 assert_char_at_range (const location &loc,
2466 lexer_test& test,
2467 location_t strloc, enum cpp_ttype type, int idx,
2468 int expected_line, int expected_start_col,
2469 int expected_finish_col)
2471 cpp_reader *pfile = test.m_parser;
2472 string_concat_db *concats = &test.m_concats;
2474 source_range actual_range = source_range();
2475 const char *err
2476 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2477 &actual_range);
2478 if (should_have_column_data_p (strloc))
2479 ASSERT_EQ_AT (loc, NULL, err);
2480 else
2482 ASSERT_STREQ_AT (loc,
2483 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2484 err);
2485 return;
2488 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2489 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2490 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2491 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2493 if (should_have_column_data_p (actual_range.m_start))
2495 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2496 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2498 if (should_have_column_data_p (actual_range.m_finish))
2500 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2501 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2505 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2506 the effective location of any errors. */
2508 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2509 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2510 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2511 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2512 (EXPECTED_FINISH_COL))
2514 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2515 using the string concatenation database for TEST.
2517 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2519 static void
2520 assert_num_substring_ranges (const location &loc,
2521 lexer_test& test,
2522 location_t strloc,
2523 enum cpp_ttype type,
2524 int expected_num_ranges)
2526 cpp_reader *pfile = test.m_parser;
2527 string_concat_db *concats = &test.m_concats;
2529 int actual_num_ranges = -1;
2530 const char *err
2531 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2532 &actual_num_ranges);
2533 if (should_have_column_data_p (strloc))
2534 ASSERT_EQ_AT (loc, NULL, err);
2535 else
2537 ASSERT_STREQ_AT (loc,
2538 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2539 err);
2540 return;
2542 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2545 /* Macro for calling assert_num_substring_ranges, supplying
2546 SELFTEST_LOCATION for the effective location of any errors. */
2548 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2549 EXPECTED_NUM_RANGES) \
2550 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2551 (TYPE), (EXPECTED_NUM_RANGES))
2554 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2555 returns an error (using the string concatenation database for TEST). */
2557 static void
2558 assert_has_no_substring_ranges (const location &loc,
2559 lexer_test& test,
2560 location_t strloc,
2561 enum cpp_ttype type,
2562 const char *expected_err)
2564 cpp_reader *pfile = test.m_parser;
2565 string_concat_db *concats = &test.m_concats;
2566 cpp_substring_ranges ranges;
2567 const char *actual_err
2568 = get_substring_ranges_for_loc (pfile, concats, strloc,
2569 type, ranges);
2570 if (should_have_column_data_p (strloc))
2571 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2572 else
2573 ASSERT_STREQ_AT (loc,
2574 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2575 actual_err);
2578 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2579 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2580 (STRLOC), (TYPE), (ERR))
2582 /* Lex a simple string literal. Verify the substring location data, before
2583 and after running cpp_interpret_string on it. */
2585 static void
2586 test_lexer_string_locations_simple (const line_table_case &case_)
2588 /* Digits 0-9 (with 0 at column 10), the simple way.
2589 ....................000000000.11111111112.2222222223333333333
2590 ....................123456789.01234567890.1234567890123456789
2591 We add a trailing comment to ensure that we correctly locate
2592 the end of the string literal token. */
2593 const char *content = " \"0123456789\" /* not a string */\n";
2594 lexer_test test (case_, content, NULL);
2596 /* Verify that we get the expected token back, with the correct
2597 location information. */
2598 const cpp_token *tok = test.get_token ();
2599 ASSERT_EQ (tok->type, CPP_STRING);
2600 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2601 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2603 /* At this point in lexing, the quote characters are treated as part of
2604 the string (they are stripped off by cpp_interpret_string). */
2606 ASSERT_EQ (tok->val.str.len, 12);
2608 /* Verify that cpp_interpret_string works. */
2609 cpp_string dst_string;
2610 const enum cpp_ttype type = CPP_STRING;
2611 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2612 &dst_string, type);
2613 ASSERT_TRUE (result);
2614 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2615 free (const_cast <unsigned char *> (dst_string.text));
2617 /* Verify ranges of individual characters. This no longer includes the
2618 opening quote, but does include the closing quote. */
2619 for (int i = 0; i <= 10; i++)
2620 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2621 10 + i, 10 + i);
2623 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2626 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2627 encoding. */
2629 static void
2630 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2632 /* EBCDIC support requires iconv. */
2633 if (!HAVE_ICONV)
2634 return;
2636 /* Digits 0-9 (with 0 at column 10), the simple way.
2637 ....................000000000.11111111112.2222222223333333333
2638 ....................123456789.01234567890.1234567890123456789
2639 We add a trailing comment to ensure that we correctly locate
2640 the end of the string literal token. */
2641 const char *content = " \"0123456789\" /* not a string */\n";
2642 ebcdic_execution_charset use_ebcdic;
2643 lexer_test test (case_, content, &use_ebcdic);
2645 /* Verify that we get the expected token back, with the correct
2646 location information. */
2647 const cpp_token *tok = test.get_token ();
2648 ASSERT_EQ (tok->type, CPP_STRING);
2649 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2650 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2652 /* At this point in lexing, the quote characters are treated as part of
2653 the string (they are stripped off by cpp_interpret_string). */
2655 ASSERT_EQ (tok->val.str.len, 12);
2657 /* The remainder of the test requires an iconv implementation that
2658 can convert from UTF-8 to the EBCDIC encoding requested above. */
2659 if (use_ebcdic.iconv_errors_occurred_p ())
2660 return;
2662 /* Verify that cpp_interpret_string works. */
2663 cpp_string dst_string;
2664 const enum cpp_ttype type = CPP_STRING;
2665 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2666 &dst_string, type);
2667 ASSERT_TRUE (result);
2668 /* We should now have EBCDIC-encoded text, specifically
2669 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2670 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2671 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2672 (const char *)dst_string.text);
2673 free (const_cast <unsigned char *> (dst_string.text));
2675 /* Verify that we don't attempt to record substring location information
2676 for such cases. */
2677 ASSERT_HAS_NO_SUBSTRING_RANGES
2678 (test, tok->src_loc, type,
2679 "execution character set != source character set");
2682 /* Lex a string literal containing a hex-escaped character.
2683 Verify the substring location data, before and after running
2684 cpp_interpret_string on it. */
2686 static void
2687 test_lexer_string_locations_hex (const line_table_case &case_)
2689 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2690 and with a space in place of digit 6, to terminate the escaped
2691 hex code.
2692 ....................000000000.111111.11112222.
2693 ....................123456789.012345.67890123. */
2694 const char *content = " \"01234\\x35 789\"\n";
2695 lexer_test test (case_, content, NULL);
2697 /* Verify that we get the expected token back, with the correct
2698 location information. */
2699 const cpp_token *tok = test.get_token ();
2700 ASSERT_EQ (tok->type, CPP_STRING);
2701 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2702 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2704 /* At this point in lexing, the quote characters are treated as part of
2705 the string (they are stripped off by cpp_interpret_string). */
2706 ASSERT_EQ (tok->val.str.len, 15);
2708 /* Verify that cpp_interpret_string works. */
2709 cpp_string dst_string;
2710 const enum cpp_ttype type = CPP_STRING;
2711 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2712 &dst_string, type);
2713 ASSERT_TRUE (result);
2714 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2715 free (const_cast <unsigned char *> (dst_string.text));
2717 /* Verify ranges of individual characters. This no longer includes the
2718 opening quote, but does include the closing quote. */
2719 for (int i = 0; i <= 4; i++)
2720 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2721 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2722 for (int i = 6; i <= 10; i++)
2723 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2725 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2728 /* Lex a string literal containing an octal-escaped character.
2729 Verify the substring location data after running cpp_interpret_string
2730 on it. */
2732 static void
2733 test_lexer_string_locations_oct (const line_table_case &case_)
2735 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2736 and with a space in place of digit 6, to terminate the escaped
2737 octal code.
2738 ....................000000000.111111.11112222.2222223333333333444
2739 ....................123456789.012345.67890123.4567890123456789012 */
2740 const char *content = " \"01234\\065 789\" /* not a string */\n";
2741 lexer_test test (case_, content, NULL);
2743 /* Verify that we get the expected token back, with the correct
2744 location information. */
2745 const cpp_token *tok = test.get_token ();
2746 ASSERT_EQ (tok->type, CPP_STRING);
2747 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2749 /* Verify that cpp_interpret_string works. */
2750 cpp_string dst_string;
2751 const enum cpp_ttype type = CPP_STRING;
2752 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2753 &dst_string, type);
2754 ASSERT_TRUE (result);
2755 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2756 free (const_cast <unsigned char *> (dst_string.text));
2758 /* Verify ranges of individual characters. This no longer includes the
2759 opening quote, but does include the closing quote. */
2760 for (int i = 0; i < 5; i++)
2761 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2762 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2763 for (int i = 6; i <= 10; i++)
2764 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2766 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2769 /* Test of string literal containing letter escapes. */
2771 static void
2772 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2774 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2775 .....................000000000.1.11111.1.1.11222.22222223333333
2776 .....................123456789.0.12345.6.7.89012.34567890123456. */
2777 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2778 lexer_test test (case_, content, NULL);
2780 /* Verify that we get the expected tokens back. */
2781 const cpp_token *tok = test.get_token ();
2782 ASSERT_EQ (tok->type, CPP_STRING);
2783 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2785 /* Verify ranges of individual characters. */
2786 /* "\t". */
2787 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2788 0, 1, 10, 11);
2789 /* "foo". */
2790 for (int i = 1; i <= 3; i++)
2791 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2792 i, 1, 11 + i, 11 + i);
2793 /* "\\" and "\n". */
2794 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2795 4, 1, 15, 16);
2796 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2797 5, 1, 17, 18);
2799 /* "bar" and closing quote for nul-terminator. */
2800 for (int i = 6; i <= 9; i++)
2801 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2802 i, 1, 13 + i, 13 + i);
2804 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2807 /* Another test of a string literal containing a letter escape.
2808 Based on string seen in
2809 printf ("%-%\n");
2810 in gcc.dg/format/c90-printf-1.c. */
2812 static void
2813 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2815 /* .....................000000000.1111.11.1111.22222222223.
2816 .....................123456789.0123.45.6789.01234567890. */
2817 const char *content = (" \"%-%\\n\" /* non-str */\n");
2818 lexer_test test (case_, content, NULL);
2820 /* Verify that we get the expected tokens back. */
2821 const cpp_token *tok = test.get_token ();
2822 ASSERT_EQ (tok->type, CPP_STRING);
2823 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2825 /* Verify ranges of individual characters. */
2826 /* "%-%". */
2827 for (int i = 0; i < 3; i++)
2828 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2829 i, 1, 10 + i, 10 + i);
2830 /* "\n". */
2831 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2832 3, 1, 13, 14);
2834 /* Closing quote for nul-terminator. */
2835 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2836 4, 1, 15, 15);
2838 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2841 /* Lex a string literal containing UCN 4 characters.
2842 Verify the substring location data after running cpp_interpret_string
2843 on it. */
2845 static void
2846 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2848 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2849 as UCN 4.
2850 ....................000000000.111111.111122.222222223.33333333344444
2851 ....................123456789.012345.678901.234567890.12345678901234 */
2852 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2853 lexer_test test (case_, content, NULL);
2855 /* Verify that we get the expected token back, with the correct
2856 location information. */
2857 const cpp_token *tok = test.get_token ();
2858 ASSERT_EQ (tok->type, CPP_STRING);
2859 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2861 /* Verify that cpp_interpret_string works.
2862 The string should be encoded in the execution character
2863 set. Assuming that is UTF-8, we should have the following:
2864 ----------- ---- ----- ------- ----------------
2865 Byte offset Byte Octal Unicode Source Column(s)
2866 ----------- ---- ----- ------- ----------------
2867 0 0x30 '0' 10
2868 1 0x31 '1' 11
2869 2 0x32 '2' 12
2870 3 0x33 '3' 13
2871 4 0x34 '4' 14
2872 5 0xE2 \342 U+2174 15-20
2873 6 0x85 \205 (cont) 15-20
2874 7 0xB4 \264 (cont) 15-20
2875 8 0xE2 \342 U+2175 21-26
2876 9 0x85 \205 (cont) 21-26
2877 10 0xB5 \265 (cont) 21-26
2878 11 0x37 '7' 27
2879 12 0x38 '8' 28
2880 13 0x39 '9' 29
2881 14 0x00 30 (closing quote)
2882 ----------- ---- ----- ------- ---------------. */
2884 cpp_string dst_string;
2885 const enum cpp_ttype type = CPP_STRING;
2886 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2887 &dst_string, type);
2888 ASSERT_TRUE (result);
2889 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2890 (const char *)dst_string.text);
2891 free (const_cast <unsigned char *> (dst_string.text));
2893 /* Verify ranges of individual characters. This no longer includes the
2894 opening quote, but does include the closing quote.
2895 '01234'. */
2896 for (int i = 0; i <= 4; i++)
2897 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2898 /* U+2174. */
2899 for (int i = 5; i <= 7; i++)
2900 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2901 /* U+2175. */
2902 for (int i = 8; i <= 10; i++)
2903 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2904 /* '789' and nul terminator */
2905 for (int i = 11; i <= 14; i++)
2906 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2908 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2911 /* Lex a string literal containing UCN 8 characters.
2912 Verify the substring location data after running cpp_interpret_string
2913 on it. */
2915 static void
2916 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2918 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2919 ....................000000000.111111.1111222222.2222333333333.344444
2920 ....................123456789.012345.6789012345.6789012345678.901234 */
2921 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2922 lexer_test test (case_, content, NULL);
2924 /* Verify that we get the expected token back, with the correct
2925 location information. */
2926 const cpp_token *tok = test.get_token ();
2927 ASSERT_EQ (tok->type, CPP_STRING);
2928 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2929 "\"01234\\U00002174\\U00002175789\"");
2931 /* Verify that cpp_interpret_string works.
2932 The UTF-8 encoding of the string is identical to that from
2933 the ucn4 testcase above; the only difference is the column
2934 locations. */
2935 cpp_string dst_string;
2936 const enum cpp_ttype type = CPP_STRING;
2937 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2938 &dst_string, type);
2939 ASSERT_TRUE (result);
2940 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2941 (const char *)dst_string.text);
2942 free (const_cast <unsigned char *> (dst_string.text));
2944 /* Verify ranges of individual characters. This no longer includes the
2945 opening quote, but does include the closing quote.
2946 '01234'. */
2947 for (int i = 0; i <= 4; i++)
2948 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2949 /* U+2174. */
2950 for (int i = 5; i <= 7; i++)
2951 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2952 /* U+2175. */
2953 for (int i = 8; i <= 10; i++)
2954 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2955 /* '789' at columns 35-37 */
2956 for (int i = 11; i <= 13; i++)
2957 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2958 /* Closing quote/nul-terminator at column 38. */
2959 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2961 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2964 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2966 static uint32_t
2967 uint32_from_big_endian (const uint32_t *ptr_be_value)
2969 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2970 return (((uint32_t) buf[0] << 24)
2971 | ((uint32_t) buf[1] << 16)
2972 | ((uint32_t) buf[2] << 8)
2973 | (uint32_t) buf[3]);
2976 /* Lex a wide string literal and verify that attempts to read substring
2977 location data from it fail gracefully. */
2979 static void
2980 test_lexer_string_locations_wide_string (const line_table_case &case_)
2982 /* Digits 0-9.
2983 ....................000000000.11111111112.22222222233333
2984 ....................123456789.01234567890.12345678901234 */
2985 const char *content = " L\"0123456789\" /* non-str */\n";
2986 lexer_test test (case_, content, NULL);
2988 /* Verify that we get the expected token back, with the correct
2989 location information. */
2990 const cpp_token *tok = test.get_token ();
2991 ASSERT_EQ (tok->type, CPP_WSTRING);
2992 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2994 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2995 cpp_string dst_string;
2996 const enum cpp_ttype type = CPP_WSTRING;
2997 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2998 &dst_string, type);
2999 ASSERT_TRUE (result);
3000 /* The cpp_reader defaults to big-endian with
3001 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3002 now be encoded as UTF-32BE. */
3003 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3004 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3005 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3006 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3007 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3008 free (const_cast <unsigned char *> (dst_string.text));
3010 /* We don't yet support generating substring location information
3011 for L"" strings. */
3012 ASSERT_HAS_NO_SUBSTRING_RANGES
3013 (test, tok->src_loc, type,
3014 "execution character set != source character set");
3017 /* Fetch a big-endian 16-bit value and convert to host endianness. */
3019 static uint16_t
3020 uint16_from_big_endian (const uint16_t *ptr_be_value)
3022 const unsigned char *buf = (const unsigned char *)ptr_be_value;
3023 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3026 /* Lex a u"" string literal and verify that attempts to read substring
3027 location data from it fail gracefully. */
3029 static void
3030 test_lexer_string_locations_string16 (const line_table_case &case_)
3032 /* Digits 0-9.
3033 ....................000000000.11111111112.22222222233333
3034 ....................123456789.01234567890.12345678901234 */
3035 const char *content = " u\"0123456789\" /* non-str */\n";
3036 lexer_test test (case_, content, NULL);
3038 /* Verify that we get the expected token back, with the correct
3039 location information. */
3040 const cpp_token *tok = test.get_token ();
3041 ASSERT_EQ (tok->type, CPP_STRING16);
3042 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3044 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
3045 cpp_string dst_string;
3046 const enum cpp_ttype type = CPP_STRING16;
3047 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3048 &dst_string, type);
3049 ASSERT_TRUE (result);
3051 /* The cpp_reader defaults to big-endian, so dst_string should
3052 now be encoded as UTF-16BE. */
3053 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3054 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3055 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3056 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3057 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3058 free (const_cast <unsigned char *> (dst_string.text));
3060 /* We don't yet support generating substring location information
3061 for L"" strings. */
3062 ASSERT_HAS_NO_SUBSTRING_RANGES
3063 (test, tok->src_loc, type,
3064 "execution character set != source character set");
3067 /* Lex a U"" string literal and verify that attempts to read substring
3068 location data from it fail gracefully. */
3070 static void
3071 test_lexer_string_locations_string32 (const line_table_case &case_)
3073 /* Digits 0-9.
3074 ....................000000000.11111111112.22222222233333
3075 ....................123456789.01234567890.12345678901234 */
3076 const char *content = " U\"0123456789\" /* non-str */\n";
3077 lexer_test test (case_, content, NULL);
3079 /* Verify that we get the expected token back, with the correct
3080 location information. */
3081 const cpp_token *tok = test.get_token ();
3082 ASSERT_EQ (tok->type, CPP_STRING32);
3083 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3085 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
3086 cpp_string dst_string;
3087 const enum cpp_ttype type = CPP_STRING32;
3088 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3089 &dst_string, type);
3090 ASSERT_TRUE (result);
3092 /* The cpp_reader defaults to big-endian, so dst_string should
3093 now be encoded as UTF-32BE. */
3094 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3095 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3096 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3097 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3098 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3099 free (const_cast <unsigned char *> (dst_string.text));
3101 /* We don't yet support generating substring location information
3102 for L"" strings. */
3103 ASSERT_HAS_NO_SUBSTRING_RANGES
3104 (test, tok->src_loc, type,
3105 "execution character set != source character set");
3108 /* Lex a u8-string literal.
3109 Verify the substring location data after running cpp_interpret_string
3110 on it. */
3112 static void
3113 test_lexer_string_locations_u8 (const line_table_case &case_)
3115 /* Digits 0-9.
3116 ....................000000000.11111111112.22222222233333
3117 ....................123456789.01234567890.12345678901234 */
3118 const char *content = " u8\"0123456789\" /* non-str */\n";
3119 lexer_test test (case_, content, NULL);
3121 /* Verify that we get the expected token back, with the correct
3122 location information. */
3123 const cpp_token *tok = test.get_token ();
3124 ASSERT_EQ (tok->type, CPP_UTF8STRING);
3125 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3127 /* Verify that cpp_interpret_string works. */
3128 cpp_string dst_string;
3129 const enum cpp_ttype type = CPP_STRING;
3130 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3131 &dst_string, type);
3132 ASSERT_TRUE (result);
3133 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3134 free (const_cast <unsigned char *> (dst_string.text));
3136 /* Verify ranges of individual characters. This no longer includes the
3137 opening quote, but does include the closing quote. */
3138 for (int i = 0; i <= 10; i++)
3139 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3142 /* Lex a string literal containing UTF-8 source characters.
3143 Verify the substring location data after running cpp_interpret_string
3144 on it. */
3146 static void
3147 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3149 /* This string literal is written out to the source file as UTF-8,
3150 and is of the form "before mojibake after", where "mojibake"
3151 is written as the following four unicode code points:
3152 U+6587 CJK UNIFIED IDEOGRAPH-6587
3153 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3154 U+5316 CJK UNIFIED IDEOGRAPH-5316
3155 U+3051 HIRAGANA LETTER KE.
3156 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3157 "before" and "after" are 1 byte per unicode character.
3159 The numbering shown are "columns", which are *byte* numbers within
3160 the line, rather than unicode character numbers.
3162 .................... 000000000.1111111.
3163 .................... 123456789.0123456. */
3164 const char *content = (" \"before "
3165 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3166 UTF-8: 0xE6 0x96 0x87
3167 C octal escaped UTF-8: \346\226\207
3168 "column" numbers: 17-19. */
3169 "\346\226\207"
3171 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3172 UTF-8: 0xE5 0xAD 0x97
3173 C octal escaped UTF-8: \345\255\227
3174 "column" numbers: 20-22. */
3175 "\345\255\227"
3177 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3178 UTF-8: 0xE5 0x8C 0x96
3179 C octal escaped UTF-8: \345\214\226
3180 "column" numbers: 23-25. */
3181 "\345\214\226"
3183 /* U+3051 HIRAGANA LETTER KE
3184 UTF-8: 0xE3 0x81 0x91
3185 C octal escaped UTF-8: \343\201\221
3186 "column" numbers: 26-28. */
3187 "\343\201\221"
3189 /* column numbers 29 onwards
3190 2333333.33334444444444
3191 9012345.67890123456789. */
3192 " after\" /* non-str */\n");
3193 lexer_test test (case_, content, NULL);
3195 /* Verify that we get the expected token back, with the correct
3196 location information. */
3197 const cpp_token *tok = test.get_token ();
3198 ASSERT_EQ (tok->type, CPP_STRING);
3199 ASSERT_TOKEN_AS_TEXT_EQ
3200 (test.m_parser, tok,
3201 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3203 /* Verify that cpp_interpret_string works. */
3204 cpp_string dst_string;
3205 const enum cpp_ttype type = CPP_STRING;
3206 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3207 &dst_string, type);
3208 ASSERT_TRUE (result);
3209 ASSERT_STREQ
3210 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3211 (const char *)dst_string.text);
3212 free (const_cast <unsigned char *> (dst_string.text));
3214 /* Verify ranges of individual characters. This no longer includes the
3215 opening quote, but does include the closing quote.
3216 Assuming that both source and execution encodings are UTF-8, we have
3217 a run of 25 octets in each, plus the NUL terminator. */
3218 for (int i = 0; i < 25; i++)
3219 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3220 /* NUL-terminator should use the closing quote at column 35. */
3221 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3223 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3226 /* Test of string literal concatenation. */
3228 static void
3229 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3231 /* Digits 0-9.
3232 .....................000000000.111111.11112222222222
3233 .....................123456789.012345.67890123456789. */
3234 const char *content = (" \"01234\" /* non-str */\n"
3235 " \"56789\" /* non-str */\n");
3236 lexer_test test (case_, content, NULL);
3238 location_t input_locs[2];
3240 /* Verify that we get the expected tokens back. */
3241 auto_vec <cpp_string> input_strings;
3242 const cpp_token *tok_a = test.get_token ();
3243 ASSERT_EQ (tok_a->type, CPP_STRING);
3244 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3245 input_strings.safe_push (tok_a->val.str);
3246 input_locs[0] = tok_a->src_loc;
3248 const cpp_token *tok_b = test.get_token ();
3249 ASSERT_EQ (tok_b->type, CPP_STRING);
3250 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3251 input_strings.safe_push (tok_b->val.str);
3252 input_locs[1] = tok_b->src_loc;
3254 /* Verify that cpp_interpret_string works. */
3255 cpp_string dst_string;
3256 const enum cpp_ttype type = CPP_STRING;
3257 bool result = cpp_interpret_string (test.m_parser,
3258 input_strings.address (), 2,
3259 &dst_string, type);
3260 ASSERT_TRUE (result);
3261 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3262 free (const_cast <unsigned char *> (dst_string.text));
3264 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3265 test.m_concats.record_string_concatenation (2, input_locs);
3267 location_t initial_loc = input_locs[0];
3269 /* "01234" on line 1. */
3270 for (int i = 0; i <= 4; i++)
3271 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3272 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3273 for (int i = 5; i <= 10; i++)
3274 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3276 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3279 /* Another test of string literal concatenation. */
3281 static void
3282 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3284 /* Digits 0-9.
3285 .....................000000000.111.11111112222222
3286 .....................123456789.012.34567890123456. */
3287 const char *content = (" \"01\" /* non-str */\n"
3288 " \"23\" /* non-str */\n"
3289 " \"45\" /* non-str */\n"
3290 " \"67\" /* non-str */\n"
3291 " \"89\" /* non-str */\n");
3292 lexer_test test (case_, content, NULL);
3294 auto_vec <cpp_string> input_strings;
3295 location_t input_locs[5];
3297 /* Verify that we get the expected tokens back. */
3298 for (int i = 0; i < 5; i++)
3300 const cpp_token *tok = test.get_token ();
3301 ASSERT_EQ (tok->type, CPP_STRING);
3302 input_strings.safe_push (tok->val.str);
3303 input_locs[i] = tok->src_loc;
3306 /* Verify that cpp_interpret_string works. */
3307 cpp_string dst_string;
3308 const enum cpp_ttype type = CPP_STRING;
3309 bool result = cpp_interpret_string (test.m_parser,
3310 input_strings.address (), 5,
3311 &dst_string, type);
3312 ASSERT_TRUE (result);
3313 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3314 free (const_cast <unsigned char *> (dst_string.text));
3316 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3317 test.m_concats.record_string_concatenation (5, input_locs);
3319 location_t initial_loc = input_locs[0];
3321 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3322 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3323 and expect get_source_range_for_substring to fail.
3324 However, for a string concatenation test, we can have a case
3325 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3326 but subsequent strings can be after it.
3327 Attempting to detect this within assert_char_at_range
3328 would overcomplicate the logic for the common test cases, so
3329 we detect it here. */
3330 if (should_have_column_data_p (input_locs[0])
3331 && !should_have_column_data_p (input_locs[4]))
3333 /* Verify that get_source_range_for_substring gracefully rejects
3334 this case. */
3335 source_range actual_range;
3336 const char *err
3337 = get_source_range_for_char (test.m_parser, &test.m_concats,
3338 initial_loc, type, 0, &actual_range);
3339 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3340 return;
3343 for (int i = 0; i < 5; i++)
3344 for (int j = 0; j < 2; j++)
3345 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3346 i + 1, 10 + j, 10 + j);
3348 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3349 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3351 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3354 /* Another test of string literal concatenation, this time combined with
3355 various kinds of escaped characters. */
3357 static void
3358 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3360 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3361 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3362 const char *content
3363 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3364 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3365 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3366 lexer_test test (case_, content, NULL);
3368 auto_vec <cpp_string> input_strings;
3369 location_t input_locs[4];
3371 /* Verify that we get the expected tokens back. */
3372 for (int i = 0; i < 4; i++)
3374 const cpp_token *tok = test.get_token ();
3375 ASSERT_EQ (tok->type, CPP_STRING);
3376 input_strings.safe_push (tok->val.str);
3377 input_locs[i] = tok->src_loc;
3380 /* Verify that cpp_interpret_string works. */
3381 cpp_string dst_string;
3382 const enum cpp_ttype type = CPP_STRING;
3383 bool result = cpp_interpret_string (test.m_parser,
3384 input_strings.address (), 4,
3385 &dst_string, type);
3386 ASSERT_TRUE (result);
3387 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3388 free (const_cast <unsigned char *> (dst_string.text));
3390 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3391 test.m_concats.record_string_concatenation (4, input_locs);
3393 location_t initial_loc = input_locs[0];
3395 for (int i = 0; i <= 4; i++)
3396 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3397 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3398 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3399 for (int i = 7; i <= 9; i++)
3400 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3402 /* NUL-terminator should use the location of the final closing quote. */
3403 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3405 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3408 /* Test of string literal in a macro. */
3410 static void
3411 test_lexer_string_locations_macro (const line_table_case &case_)
3413 /* Digits 0-9.
3414 .....................0000000001111111111.22222222223.
3415 .....................1234567890123456789.01234567890. */
3416 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3417 " MACRO");
3418 lexer_test test (case_, content, NULL);
3420 /* Verify that we get the expected tokens back. */
3421 const cpp_token *tok = test.get_token ();
3422 ASSERT_EQ (tok->type, CPP_PADDING);
3424 tok = test.get_token ();
3425 ASSERT_EQ (tok->type, CPP_STRING);
3426 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3428 /* Verify ranges of individual characters. We ought to
3429 see columns within the macro definition. */
3430 for (int i = 0; i <= 10; i++)
3431 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3432 i, 1, 20 + i, 20 + i);
3434 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3436 tok = test.get_token ();
3437 ASSERT_EQ (tok->type, CPP_PADDING);
3440 /* Test of stringification of a macro argument. */
3442 static void
3443 test_lexer_string_locations_stringified_macro_argument
3444 (const line_table_case &case_)
3446 /* .....................000000000111111111122222222223.
3447 .....................123456789012345678901234567890. */
3448 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3449 "MACRO(foo)\n");
3450 lexer_test test (case_, content, NULL);
3452 /* Verify that we get the expected token back. */
3453 const cpp_token *tok = test.get_token ();
3454 ASSERT_EQ (tok->type, CPP_PADDING);
3456 tok = test.get_token ();
3457 ASSERT_EQ (tok->type, CPP_STRING);
3458 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3460 /* We don't support getting the location of a stringified macro
3461 argument. Verify that it fails gracefully. */
3462 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3463 "cpp_interpret_string_1 failed");
3465 tok = test.get_token ();
3466 ASSERT_EQ (tok->type, CPP_PADDING);
3468 tok = test.get_token ();
3469 ASSERT_EQ (tok->type, CPP_PADDING);
3472 /* Ensure that we are fail gracefully if something attempts to pass
3473 in a location that isn't a string literal token. Seen on this code:
3475 const char a[] = " %d ";
3476 __builtin_printf (a, 0.5);
3479 when c-format.c erroneously used the indicated one-character
3480 location as the format string location, leading to a read past the
3481 end of a string buffer in cpp_interpret_string_1. */
3483 static void
3484 test_lexer_string_locations_non_string (const line_table_case &case_)
3486 /* .....................000000000111111111122222222223.
3487 .....................123456789012345678901234567890. */
3488 const char *content = (" a\n");
3489 lexer_test test (case_, content, NULL);
3491 /* Verify that we get the expected token back. */
3492 const cpp_token *tok = test.get_token ();
3493 ASSERT_EQ (tok->type, CPP_NAME);
3494 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3496 /* At this point, libcpp is attempting to interpret the name as a
3497 string literal, despite it not starting with a quote. We don't detect
3498 that, but we should at least fail gracefully. */
3499 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3500 "cpp_interpret_string_1 failed");
3503 /* Ensure that we can read substring information for a token which
3504 starts in one linemap and ends in another . Adapted from
3505 gcc.dg/cpp/pr69985.c. */
3507 static void
3508 test_lexer_string_locations_long_line (const line_table_case &case_)
3510 /* .....................000000.000111111111
3511 .....................123456.789012346789. */
3512 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3513 " \"0123456789012345678901234567890123456789"
3514 "0123456789012345678901234567890123456789"
3515 "0123456789012345678901234567890123456789"
3516 "0123456789\"\n");
3518 lexer_test test (case_, content, NULL);
3520 /* Verify that we get the expected token back. */
3521 const cpp_token *tok = test.get_token ();
3522 ASSERT_EQ (tok->type, CPP_STRING);
3524 if (!should_have_column_data_p (line_table->highest_location))
3525 return;
3527 /* Verify ranges of individual characters. */
3528 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3529 for (int i = 0; i < 131; i++)
3530 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3531 i, 2, 7 + i, 7 + i);
3534 /* Test of locations within a raw string that doesn't contain a newline. */
3536 static void
3537 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3539 /* .....................00.0000000111111111122.
3540 .....................12.3456789012345678901. */
3541 const char *content = ("R\"foo(0123456789)foo\"\n");
3542 lexer_test test (case_, content, NULL);
3544 /* Verify that we get the expected token back. */
3545 const cpp_token *tok = test.get_token ();
3546 ASSERT_EQ (tok->type, CPP_STRING);
3548 /* Verify that cpp_interpret_string works. */
3549 cpp_string dst_string;
3550 const enum cpp_ttype type = CPP_STRING;
3551 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3552 &dst_string, type);
3553 ASSERT_TRUE (result);
3554 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3555 free (const_cast <unsigned char *> (dst_string.text));
3557 if (!should_have_column_data_p (line_table->highest_location))
3558 return;
3560 /* 0-9, plus the nil terminator. */
3561 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3562 for (int i = 0; i < 11; i++)
3563 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3564 i, 1, 7 + i, 7 + i);
3567 /* Test of locations within a raw string that contains a newline. */
3569 static void
3570 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3572 /* .....................00.0000.
3573 .....................12.3456. */
3574 const char *content = ("R\"foo(\n"
3575 /* .....................00000.
3576 .....................12345. */
3577 "hello\n"
3578 "world\n"
3579 /* .....................00000.
3580 .....................12345. */
3581 ")foo\"\n");
3582 lexer_test test (case_, content, NULL);
3584 /* Verify that we get the expected token back. */
3585 const cpp_token *tok = test.get_token ();
3586 ASSERT_EQ (tok->type, CPP_STRING);
3588 /* Verify that cpp_interpret_string works. */
3589 cpp_string dst_string;
3590 const enum cpp_ttype type = CPP_STRING;
3591 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3592 &dst_string, type);
3593 ASSERT_TRUE (result);
3594 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3595 free (const_cast <unsigned char *> (dst_string.text));
3597 if (!should_have_column_data_p (line_table->highest_location))
3598 return;
3600 /* Currently we don't support locations within raw strings that
3601 contain newlines. */
3602 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3603 "range endpoints are on different lines");
3606 /* Test of parsing an unterminated raw string. */
3608 static void
3609 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3611 const char *content = "R\"ouch()ouCh\" /* etc */";
3613 lexer_diagnostic_sink diagnostics;
3614 lexer_test test (case_, content, &diagnostics);
3615 test.m_implicitly_expect_EOF = false;
3617 /* Attempt to parse the raw string. */
3618 const cpp_token *tok = test.get_token ();
3619 ASSERT_EQ (tok->type, CPP_EOF);
3621 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3622 /* We expect the message "unterminated raw string"
3623 in the "cpplib" translation domain.
3624 It's not clear that dgettext is available on all supported hosts,
3625 so this assertion is commented-out for now.
3626 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3627 diagnostics.m_diagnostics[0]);
3631 /* Test of lexing char constants. */
3633 static void
3634 test_lexer_char_constants (const line_table_case &case_)
3636 /* Various char constants.
3637 .....................0000000001111111111.22222222223.
3638 .....................1234567890123456789.01234567890. */
3639 const char *content = (" 'a'\n"
3640 " u'a'\n"
3641 " U'a'\n"
3642 " L'a'\n"
3643 " 'abc'\n");
3644 lexer_test test (case_, content, NULL);
3646 /* Verify that we get the expected tokens back. */
3647 /* 'a'. */
3648 const cpp_token *tok = test.get_token ();
3649 ASSERT_EQ (tok->type, CPP_CHAR);
3650 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3652 unsigned int chars_seen;
3653 int unsignedp;
3654 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3655 &chars_seen, &unsignedp);
3656 ASSERT_EQ (cc, 'a');
3657 ASSERT_EQ (chars_seen, 1);
3659 /* u'a'. */
3660 tok = test.get_token ();
3661 ASSERT_EQ (tok->type, CPP_CHAR16);
3662 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3664 /* U'a'. */
3665 tok = test.get_token ();
3666 ASSERT_EQ (tok->type, CPP_CHAR32);
3667 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3669 /* L'a'. */
3670 tok = test.get_token ();
3671 ASSERT_EQ (tok->type, CPP_WCHAR);
3672 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3674 /* 'abc' (c-char-sequence). */
3675 tok = test.get_token ();
3676 ASSERT_EQ (tok->type, CPP_CHAR);
3677 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3679 /* A table of interesting location_t values, giving one axis of our test
3680 matrix. */
3682 static const location_t boundary_locations[] = {
3683 /* Zero means "don't override the default values for a new line_table". */
3686 /* An arbitrary non-zero value that isn't close to one of
3687 the boundary values below. */
3688 0x10000,
3690 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3691 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3692 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3693 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3694 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3695 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3697 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3698 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3699 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3700 LINE_MAP_MAX_LOCATION_WITH_COLS,
3701 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3702 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3705 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3707 void
3708 for_each_line_table_case (void (*testcase) (const line_table_case &))
3710 /* As noted above in the description of struct line_table_case,
3711 we want to explore a test matrix of interesting line_table
3712 situations, running various selftests for each case within the
3713 matrix. */
3715 /* Run all tests with:
3716 (a) line_table->default_range_bits == 0, and
3717 (b) line_table->default_range_bits == 5. */
3718 int num_cases_tested = 0;
3719 for (int default_range_bits = 0; default_range_bits <= 5;
3720 default_range_bits += 5)
3722 /* ...and use each of the "interesting" location values as
3723 the starting location within line_table. */
3724 const int num_boundary_locations
3725 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3726 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3728 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3730 testcase (c);
3732 num_cases_tested++;
3736 /* Verify that we fully covered the test matrix. */
3737 ASSERT_EQ (num_cases_tested, 2 * 12);
3740 /* Verify that when presented with a consecutive pair of locations with
3741 a very large line offset, we don't attempt to consolidate them into
3742 a single ordinary linemap where the line offsets within the line map
3743 would lead to overflow (PR lto/88147). */
3745 static void
3746 test_line_offset_overflow ()
3748 line_table_test ltt (line_table_case (5, 0));
3750 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3751 linemap_line_start (line_table, 1, 100);
3752 location_t loc_a = linemap_line_start (line_table, 2578, 255);
3753 assert_loceq ("foo.c", 2578, 0, loc_a);
3755 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3756 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3757 ASSERT_EQ (ordmap_a->m_range_bits, 5);
3759 location_t loc_b = linemap_line_start (line_table, 404198, 512);
3760 assert_loceq ("foo.c", 404198, 0, loc_b);
3762 /* We should have started a new linemap, rather than attempting to store
3763 a very large line offset. */
3764 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3765 ASSERT_NE (ordmap_a, ordmap_b);
3768 void test_cpp_utf8 ()
3770 const int def_tabstop = 8;
3771 cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3773 /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
3775 int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
3776 ASSERT_EQ (8, w_bad);
3777 int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
3778 ASSERT_EQ (5, w_ctrl);
3781 /* Verify that wcwidth of valid UTF-8 is as expected. */
3783 const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
3784 ASSERT_EQ (1, w_pi);
3785 const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
3786 ASSERT_EQ (2, w_emoji);
3787 const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3788 policy);
3789 ASSERT_EQ (1, w_umlaut_precomposed);
3790 const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3791 policy);
3792 ASSERT_EQ (1, w_umlaut_combining);
3793 const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
3794 ASSERT_EQ (2, w_han);
3795 const int w_ascii = cpp_display_width ("GCC", 3, policy);
3796 ASSERT_EQ (3, w_ascii);
3797 const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3798 "\x9f! \xe4\xb8\xba y\xcc\x88",
3799 24, policy);
3800 ASSERT_EQ (18, w_mixed);
3803 /* Verify that display width properly expands tabs. */
3805 const char *tstr = "\tabc\td";
3806 ASSERT_EQ (6, cpp_display_width (tstr, 6,
3807 cpp_char_column_policy (1, cpp_wcwidth)));
3808 ASSERT_EQ (10, cpp_display_width (tstr, 6,
3809 cpp_char_column_policy (3, cpp_wcwidth)));
3810 ASSERT_EQ (17, cpp_display_width (tstr, 6,
3811 cpp_char_column_policy (8, cpp_wcwidth)));
3812 ASSERT_EQ (1,
3813 cpp_display_column_to_byte_column
3814 (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
3817 /* Verify that cpp_byte_column_to_display_column can go past the end,
3818 and similar edge cases. */
3820 const char *str
3821 /* Display columns.
3822 111111112345 */
3823 = "\xcf\x80 abc";
3824 /* 111122223456
3825 Byte columns. */
3827 ASSERT_EQ (5, cpp_display_width (str, 6, policy));
3828 ASSERT_EQ (105,
3829 cpp_byte_column_to_display_column (str, 6, 106, policy));
3830 ASSERT_EQ (10000,
3831 cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
3832 ASSERT_EQ (0,
3833 cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
3836 /* Verify that cpp_display_column_to_byte_column can go past the end,
3837 and similar edge cases, and check invertibility. */
3839 const char *str
3840 /* Display columns.
3841 000000000000000000000000000000000000011
3842 111111112222222234444444455555555678901 */
3843 = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
3844 /* 000000000000000000000000000000000111111
3845 111122223333444456666777788889999012345
3846 Byte columns. */
3847 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
3848 ASSERT_EQ (15,
3849 cpp_display_column_to_byte_column (str, 15, 11, policy));
3850 ASSERT_EQ (115,
3851 cpp_display_column_to_byte_column (str, 15, 111, policy));
3852 ASSERT_EQ (10000,
3853 cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
3854 ASSERT_EQ (0,
3855 cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
3857 /* Verify that we do not interrupt a UTF-8 sequence. */
3858 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
3860 for (int byte_col = 1; byte_col <= 15; ++byte_col)
3862 const int disp_col
3863 = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
3864 const int byte_col2
3865 = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
3867 /* If we ask for the display column in the middle of a UTF-8
3868 sequence, it will return the length of the partial sequence,
3869 matching the behavior of GCC before display column support.
3870 Otherwise check the round trip was successful. */
3871 if (byte_col < 4)
3872 ASSERT_EQ (byte_col, disp_col);
3873 else if (byte_col >= 6 && byte_col < 9)
3874 ASSERT_EQ (3 + (byte_col - 5), disp_col);
3875 else
3876 ASSERT_EQ (byte_col2, byte_col);
3882 /* Run all of the selftests within this file. */
3884 void
3885 input_c_tests ()
3887 test_linenum_comparisons ();
3888 test_should_have_column_data_p ();
3889 test_unknown_location ();
3890 test_builtins ();
3891 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3893 for_each_line_table_case (test_accessing_ordinary_linemaps);
3894 for_each_line_table_case (test_lexer);
3895 for_each_line_table_case (test_lexer_string_locations_simple);
3896 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3897 for_each_line_table_case (test_lexer_string_locations_hex);
3898 for_each_line_table_case (test_lexer_string_locations_oct);
3899 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3900 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3901 for_each_line_table_case (test_lexer_string_locations_ucn4);
3902 for_each_line_table_case (test_lexer_string_locations_ucn8);
3903 for_each_line_table_case (test_lexer_string_locations_wide_string);
3904 for_each_line_table_case (test_lexer_string_locations_string16);
3905 for_each_line_table_case (test_lexer_string_locations_string32);
3906 for_each_line_table_case (test_lexer_string_locations_u8);
3907 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3908 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3909 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3910 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3911 for_each_line_table_case (test_lexer_string_locations_macro);
3912 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3913 for_each_line_table_case (test_lexer_string_locations_non_string);
3914 for_each_line_table_case (test_lexer_string_locations_long_line);
3915 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3916 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3917 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3918 for_each_line_table_case (test_lexer_char_constants);
3920 test_reading_source_line ();
3922 test_line_offset_overflow ();
3924 test_cpp_utf8 ();
3927 } // namespace selftest
3929 #endif /* CHECKING_P */