Fix ICE on view conversion between struct and integer
[official-gcc.git] / gcc / input.cc
blob060ca16012686035c2f117c97613a56bf1afebc3
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2022 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic.h"
25 #include "selftest.h"
26 #include "cpplib.h"
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
32 /* Input charset configuration. */
33 static const char *default_charset_callback (const char *)
35 return nullptr;
38 void
39 file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
40 bool should_skip_bom)
42 in_context.ccb = (ccb ? ccb : default_charset_callback);
43 in_context.should_skip_bom = should_skip_bom;
46 /* This is a cache used by get_next_line to store the content of a
47 file to be searched for file lines. */
48 class file_cache_slot
50 public:
51 file_cache_slot ();
52 ~file_cache_slot ();
54 bool read_line_num (size_t line_num,
55 char ** line, ssize_t *line_len);
57 /* Accessors. */
58 const char *get_file_path () const { return m_file_path; }
59 unsigned get_use_count () const { return m_use_count; }
60 bool missing_trailing_newline_p () const
62 return m_missing_trailing_newline;
65 void inc_use_count () { m_use_count++; }
67 bool create (const file_cache::input_context &in_context,
68 const char *file_path, FILE *fp, unsigned highest_use_count);
69 void evict ();
71 private:
72 /* These are information used to store a line boundary. */
73 class line_info
75 public:
76 /* The line number. It starts from 1. */
77 size_t line_num;
79 /* The position (byte count) of the beginning of the line,
80 relative to the file data pointer. This starts at zero. */
81 size_t start_pos;
83 /* The position (byte count) of the last byte of the line. This
84 normally points to the '\n' character, or to one byte after the
85 last byte of the file, if the file doesn't contain a '\n'
86 character. */
87 size_t end_pos;
89 line_info (size_t l, size_t s, size_t e)
90 : line_num (l), start_pos (s), end_pos (e)
93 line_info ()
94 :line_num (0), start_pos (0), end_pos (0)
98 bool needs_read_p () const;
99 bool needs_grow_p () const;
100 void maybe_grow ();
101 bool read_data ();
102 bool maybe_read_data ();
103 bool get_next_line (char **line, ssize_t *line_len);
104 bool read_next_line (char ** line, ssize_t *line_len);
105 bool goto_next_line ();
107 static const size_t buffer_size = 4 * 1024;
108 static const size_t line_record_size = 100;
110 /* The number of time this file has been accessed. This is used
111 to designate which file cache to evict from the cache
112 array. */
113 unsigned m_use_count;
115 /* The file_path is the key for identifying a particular file in
116 the cache.
117 For libcpp-using code, the underlying buffer for this field is
118 owned by the corresponding _cpp_file within the cpp_reader. */
119 const char *m_file_path;
121 FILE *m_fp;
123 /* This points to the content of the file that we've read so
124 far. */
125 char *m_data;
127 /* The allocated buffer to be freed may start a little earlier than DATA,
128 e.g. if a UTF8 BOM was skipped at the beginning. */
129 int m_alloc_offset;
131 /* The size of the DATA array above.*/
132 size_t m_size;
134 /* The number of bytes read from the underlying file so far. This
135 must be less (or equal) than SIZE above. */
136 size_t m_nb_read;
138 /* The index of the beginning of the current line. */
139 size_t m_line_start_idx;
141 /* The number of the previous line read. This starts at 1. Zero
142 means we've read no line so far. */
143 size_t m_line_num;
145 /* This is the total number of lines of the current file. At the
146 moment, we try to get this information from the line map
147 subsystem. Note that this is just a hint. When using the C++
148 front-end, this hint is correct because the input file is then
149 completely tokenized before parsing starts; so the line map knows
150 the number of lines before compilation really starts. For e.g,
151 the C front-end, it can happen that we start emitting diagnostics
152 before the line map has seen the end of the file. */
153 size_t m_total_lines;
155 /* Could this file be missing a trailing newline on its final line?
156 Initially true (to cope with empty files), set to true/false
157 as each line is read. */
158 bool m_missing_trailing_newline;
160 /* This is a record of the beginning and end of the lines we've seen
161 while reading the file. This is useful to avoid walking the data
162 from the beginning when we are asked to read a line that is
163 before LINE_START_IDX above. Note that the maximum size of this
164 record is line_record_size, so that the memory consumption
165 doesn't explode. We thus scale total_lines down to
166 line_record_size. */
167 vec<line_info, va_heap> m_line_record;
169 void offset_buffer (int offset)
171 gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
172 : (size_t) offset <= m_size);
173 gcc_assert (m_data);
174 m_alloc_offset += offset;
175 m_data += offset;
176 m_size -= offset;
181 /* Current position in real source file. */
183 location_t input_location = UNKNOWN_LOCATION;
185 class line_maps *line_table;
187 /* A stashed copy of "line_table" for use by selftest::line_table_test.
188 This needs to be a global so that it can be a GC root, and thus
189 prevent the stashed copy from being garbage-collected if the GC runs
190 during a line_table_test. */
192 class line_maps *saved_line_table;
194 /* Expand the source location LOC into a human readable location. If
195 LOC resolves to a builtin location, the file name of the readable
196 location is set to the string "<built-in>". If EXPANSION_POINT_P is
197 TRUE and LOC is virtual, then it is resolved to the expansion
198 point of the involved macro. Otherwise, it is resolved to the
199 spelling location of the token.
201 When resolving to the spelling location of the token, if the
202 resulting location is for a built-in location (that is, it has no
203 associated line/column) in the context of a macro expansion, the
204 returned location is the first one (while unwinding the macro
205 location towards its expansion point) that is in real source
206 code.
208 ASPECT controls which part of the location to use. */
210 static expanded_location
211 expand_location_1 (location_t loc,
212 bool expansion_point_p,
213 enum location_aspect aspect)
215 expanded_location xloc;
216 const line_map_ordinary *map;
217 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
218 tree block = NULL;
220 if (IS_ADHOC_LOC (loc))
222 block = LOCATION_BLOCK (loc);
223 loc = LOCATION_LOCUS (loc);
226 memset (&xloc, 0, sizeof (xloc));
228 if (loc >= RESERVED_LOCATION_COUNT)
230 if (!expansion_point_p)
232 /* We want to resolve LOC to its spelling location.
234 But if that spelling location is a reserved location that
235 appears in the context of a macro expansion (like for a
236 location for a built-in token), let's consider the first
237 location (toward the expansion point) that is not reserved;
238 that is, the first location that is in real source code. */
239 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
240 loc, NULL);
241 lrk = LRK_SPELLING_LOCATION;
243 loc = linemap_resolve_location (line_table, loc, lrk, &map);
245 /* loc is now either in an ordinary map, or is a reserved location.
246 If it is a compound location, the caret is in a spelling location,
247 but the start/finish might still be a virtual location.
248 Depending of what the caller asked for, we may need to recurse
249 one level in order to resolve any virtual locations in the
250 end-points. */
251 switch (aspect)
253 default:
254 gcc_unreachable ();
255 /* Fall through. */
256 case LOCATION_ASPECT_CARET:
257 break;
258 case LOCATION_ASPECT_START:
260 location_t start = get_start (loc);
261 if (start != loc)
262 return expand_location_1 (start, expansion_point_p, aspect);
264 break;
265 case LOCATION_ASPECT_FINISH:
267 location_t finish = get_finish (loc);
268 if (finish != loc)
269 return expand_location_1 (finish, expansion_point_p, aspect);
271 break;
273 xloc = linemap_expand_location (line_table, map, loc);
276 xloc.data = block;
277 if (loc <= BUILTINS_LOCATION)
278 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
280 return xloc;
283 /* Initialize the set of cache used for files accessed by caret
284 diagnostic. */
286 static void
287 diagnostic_file_cache_init (void)
289 gcc_assert (global_dc);
290 if (global_dc->m_file_cache == NULL)
291 global_dc->m_file_cache = new file_cache ();
294 /* Free the resources used by the set of cache used for files accessed
295 by caret diagnostic. */
297 void
298 diagnostic_file_cache_fini (void)
300 if (global_dc->m_file_cache)
302 delete global_dc->m_file_cache;
303 global_dc->m_file_cache = NULL;
307 /* Return the total lines number that have been read so far by the
308 line map (in the preprocessor) so far. For languages like C++ that
309 entirely preprocess the input file before starting to parse, this
310 equals the actual number of lines of the file. */
312 static size_t
313 total_lines_num (const char *file_path)
315 size_t r = 0;
316 location_t l = 0;
317 if (linemap_get_file_highest_location (line_table, file_path, &l))
319 gcc_assert (l >= RESERVED_LOCATION_COUNT);
320 expanded_location xloc = expand_location (l);
321 r = xloc.line;
323 return r;
326 /* Lookup the cache used for the content of a given file accessed by
327 caret diagnostic. Return the found cached file, or NULL if no
328 cached file was found. */
330 file_cache_slot *
331 file_cache::lookup_file (const char *file_path)
333 gcc_assert (file_path);
335 /* This will contain the found cached file. */
336 file_cache_slot *r = NULL;
337 for (unsigned i = 0; i < num_file_slots; ++i)
339 file_cache_slot *c = &m_file_slots[i];
340 if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
342 c->inc_use_count ();
343 r = c;
347 if (r)
348 r->inc_use_count ();
350 return r;
353 /* Purge any mention of FILENAME from the cache of files used for
354 printing source code. For use in selftests when working
355 with tempfiles. */
357 void
358 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
360 gcc_assert (file_path);
362 if (!global_dc->m_file_cache)
363 return;
365 global_dc->m_file_cache->forcibly_evict_file (file_path);
368 void
369 file_cache::forcibly_evict_file (const char *file_path)
371 gcc_assert (file_path);
373 file_cache_slot *r = lookup_file (file_path);
374 if (!r)
375 /* Not found. */
376 return;
378 r->evict ();
381 void
382 file_cache_slot::evict ()
384 m_file_path = NULL;
385 if (m_fp)
386 fclose (m_fp);
387 m_fp = NULL;
388 m_nb_read = 0;
389 m_line_start_idx = 0;
390 m_line_num = 0;
391 m_line_record.truncate (0);
392 m_use_count = 0;
393 m_total_lines = 0;
394 m_missing_trailing_newline = true;
397 /* Return the file cache that has been less used, recently, or the
398 first empty one. If HIGHEST_USE_COUNT is non-null,
399 *HIGHEST_USE_COUNT is set to the highest use count of the entries
400 in the cache table. */
402 file_cache_slot*
403 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
405 diagnostic_file_cache_init ();
407 file_cache_slot *to_evict = &m_file_slots[0];
408 unsigned huc = to_evict->get_use_count ();
409 for (unsigned i = 1; i < num_file_slots; ++i)
411 file_cache_slot *c = &m_file_slots[i];
412 bool c_is_empty = (c->get_file_path () == NULL);
414 if (c->get_use_count () < to_evict->get_use_count ()
415 || (to_evict->get_file_path () && c_is_empty))
416 /* We evict C because it's either an entry with a lower use
417 count or one that is empty. */
418 to_evict = c;
420 if (huc < c->get_use_count ())
421 huc = c->get_use_count ();
423 if (c_is_empty)
424 /* We've reached the end of the cache; subsequent elements are
425 all empty. */
426 break;
429 if (highest_use_count)
430 *highest_use_count = huc;
432 return to_evict;
435 /* Create the cache used for the content of a given file to be
436 accessed by caret diagnostic. This cache is added to an array of
437 cache and can be retrieved by lookup_file_in_cache_tab. This
438 function returns the created cache. Note that only the last
439 num_file_slots files are cached. */
441 file_cache_slot*
442 file_cache::add_file (const char *file_path)
445 FILE *fp = fopen (file_path, "r");
446 if (fp == NULL)
447 return NULL;
449 unsigned highest_use_count = 0;
450 file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
451 if (!r->create (in_context, file_path, fp, highest_use_count))
452 return NULL;
453 return r;
456 /* Populate this slot for use on FILE_PATH and FP, dropping any
457 existing cached content within it. */
459 bool
460 file_cache_slot::create (const file_cache::input_context &in_context,
461 const char *file_path, FILE *fp,
462 unsigned highest_use_count)
464 m_file_path = file_path;
465 if (m_fp)
466 fclose (m_fp);
467 m_fp = fp;
468 if (m_alloc_offset)
469 offset_buffer (-m_alloc_offset);
470 m_nb_read = 0;
471 m_line_start_idx = 0;
472 m_line_num = 0;
473 m_line_record.truncate (0);
474 /* Ensure that this cache entry doesn't get evicted next time
475 add_file_to_cache_tab is called. */
476 m_use_count = ++highest_use_count;
477 m_total_lines = total_lines_num (file_path);
478 m_missing_trailing_newline = true;
481 /* Check the input configuration to determine if we need to do any
482 transformations, such as charset conversion or BOM skipping. */
483 if (const char *input_charset = in_context.ccb (file_path))
485 /* Need a full-blown conversion of the input charset. */
486 fclose (m_fp);
487 m_fp = NULL;
488 const cpp_converted_source cs
489 = cpp_get_converted_source (file_path, input_charset);
490 if (!cs.data)
491 return false;
492 if (m_data)
493 XDELETEVEC (m_data);
494 m_data = cs.data;
495 m_nb_read = m_size = cs.len;
496 m_alloc_offset = cs.data - cs.to_free;
498 else if (in_context.should_skip_bom)
500 if (read_data ())
502 const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
503 offset_buffer (offset);
504 m_nb_read -= offset;
508 return true;
511 /* file_cache's ctor. */
513 file_cache::file_cache ()
514 : m_file_slots (new file_cache_slot[num_file_slots])
516 initialize_input_context (nullptr, false);
519 /* file_cache's dtor. */
521 file_cache::~file_cache ()
523 delete[] m_file_slots;
526 /* Lookup the cache used for the content of a given file accessed by
527 caret diagnostic. If no cached file was found, create a new cache
528 for this file, add it to the array of cached file and return
529 it. */
531 file_cache_slot*
532 file_cache::lookup_or_add_file (const char *file_path)
534 file_cache_slot *r = lookup_file (file_path);
535 if (r == NULL)
536 r = add_file (file_path);
537 return r;
540 /* Default constructor for a cache of file used by caret
541 diagnostic. */
543 file_cache_slot::file_cache_slot ()
544 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
545 m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
546 m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
548 m_line_record.create (0);
551 /* Destructor for a cache of file used by caret diagnostic. */
553 file_cache_slot::~file_cache_slot ()
555 if (m_fp)
557 fclose (m_fp);
558 m_fp = NULL;
560 if (m_data)
562 offset_buffer (-m_alloc_offset);
563 XDELETEVEC (m_data);
564 m_data = 0;
566 m_line_record.release ();
569 /* Returns TRUE iff the cache would need to be filled with data coming
570 from the file. That is, either the cache is empty or full or the
571 current line is empty. Note that if the cache is full, it would
572 need to be extended and filled again. */
574 bool
575 file_cache_slot::needs_read_p () const
577 return m_fp && (m_nb_read == 0
578 || m_nb_read == m_size
579 || (m_line_start_idx >= m_nb_read - 1));
582 /* Return TRUE iff the cache is full and thus needs to be
583 extended. */
585 bool
586 file_cache_slot::needs_grow_p () const
588 return m_nb_read == m_size;
591 /* Grow the cache if it needs to be extended. */
593 void
594 file_cache_slot::maybe_grow ()
596 if (!needs_grow_p ())
597 return;
599 if (!m_data)
601 gcc_assert (m_size == 0 && m_alloc_offset == 0);
602 m_size = buffer_size;
603 m_data = XNEWVEC (char, m_size);
605 else
607 const int offset = m_alloc_offset;
608 offset_buffer (-offset);
609 m_size *= 2;
610 m_data = XRESIZEVEC (char, m_data, m_size);
611 offset_buffer (offset);
615 /* Read more data into the cache. Extends the cache if need be.
616 Returns TRUE iff new data could be read. */
618 bool
619 file_cache_slot::read_data ()
621 if (feof (m_fp) || ferror (m_fp))
622 return false;
624 maybe_grow ();
626 char * from = m_data + m_nb_read;
627 size_t to_read = m_size - m_nb_read;
628 size_t nb_read = fread (from, 1, to_read, m_fp);
630 if (ferror (m_fp))
631 return false;
633 m_nb_read += nb_read;
634 return !!nb_read;
637 /* Read new data iff the cache needs to be filled with more data
638 coming from the file FP. Return TRUE iff the cache was filled with
639 mode data. */
641 bool
642 file_cache_slot::maybe_read_data ()
644 if (!needs_read_p ())
645 return false;
646 return read_data ();
649 /* Helper function for file_cache_slot::get_next_line (), to find the end of
650 the next line. Returns with the memchr convention, i.e. nullptr if a line
651 terminator was not found. We need to determine line endings in the same
652 manner that libcpp does: any of \n, \r\n, or \r is a line ending. */
654 static char *
655 find_end_of_line (char *s, size_t len)
657 for (const auto end = s + len; s != end; ++s)
659 if (*s == '\n')
660 return s;
661 if (*s == '\r')
663 const auto next = s + 1;
664 if (next == end)
666 /* Don't find the line ending if \r is the very last character
667 in the buffer; we do not know if it's the end of the file or
668 just the end of what has been read so far, and we wouldn't
669 want to break in the middle of what's actually a \r\n
670 sequence. Instead, we will handle the case of a file ending
671 in a \r later. */
672 break;
674 return (*next == '\n' ? next : s);
677 return nullptr;
680 /* Read a new line from file FP, using C as a cache for the data
681 coming from the file. Upon successful completion, *LINE is set to
682 the beginning of the line found. *LINE points directly in the
683 line cache and is only valid until the next call of get_next_line.
684 *LINE_LEN is set to the length of the line. Note that the line
685 does not contain any terminal delimiter. This function returns
686 true if some data was read or process from the cache, false
687 otherwise. Note that subsequent calls to get_next_line might
688 make the content of *LINE invalid. */
690 bool
691 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
693 /* Fill the cache with data to process. */
694 maybe_read_data ();
696 size_t remaining_size = m_nb_read - m_line_start_idx;
697 if (remaining_size == 0)
698 /* There is no more data to process. */
699 return false;
701 char *line_start = m_data + m_line_start_idx;
703 char *next_line_start = NULL;
704 size_t len = 0;
705 char *line_end = find_end_of_line (line_start, remaining_size);
706 if (line_end == NULL)
708 /* We haven't found an end-of-line delimiter in the cache.
709 Fill the cache with more data from the file and look again. */
710 while (maybe_read_data ())
712 line_start = m_data + m_line_start_idx;
713 remaining_size = m_nb_read - m_line_start_idx;
714 line_end = find_end_of_line (line_start, remaining_size);
715 if (line_end != NULL)
717 next_line_start = line_end + 1;
718 break;
721 if (line_end == NULL)
723 /* We've loaded all the file into the cache and still no
724 terminator. Let's say the line ends up at one byte past the
725 end of the file. This is to stay consistent with the case
726 of when the line ends up with a terminator and line_end points to
727 that. That consistency is useful below in the len calculation.
729 If the file ends in a \r, we didn't identify it as a line
730 terminator above, so do that now instead. */
731 line_end = m_data + m_nb_read;
732 if (m_nb_read && line_end[-1] == '\r')
734 --line_end;
735 m_missing_trailing_newline = false;
737 else
738 m_missing_trailing_newline = true;
740 else
741 m_missing_trailing_newline = false;
743 else
745 next_line_start = line_end + 1;
746 m_missing_trailing_newline = false;
749 if (m_fp && ferror (m_fp))
750 return false;
752 /* At this point, we've found the end of the of line. It either points to
753 the line terminator or to one byte after the last byte of the file. */
754 gcc_assert (line_end != NULL);
756 len = line_end - line_start;
758 if (m_line_start_idx < m_nb_read)
759 *line = line_start;
761 ++m_line_num;
763 /* Before we update our line record, make sure the hint about the
764 total number of lines of the file is correct. If it's not, then
765 we give up recording line boundaries from now on. */
766 bool update_line_record = true;
767 if (m_line_num > m_total_lines)
768 update_line_record = false;
770 /* Now update our line record so that re-reading lines from the
771 before m_line_start_idx is faster. */
772 if (update_line_record
773 && m_line_record.length () < line_record_size)
775 /* If the file lines fits in the line record, we just record all
776 its lines ...*/
777 if (m_total_lines <= line_record_size
778 && m_line_num > m_line_record.length ())
779 m_line_record.safe_push
780 (file_cache_slot::line_info (m_line_num,
781 m_line_start_idx,
782 line_end - m_data));
783 else if (m_total_lines > line_record_size)
785 /* ... otherwise, we just scale total_lines down to
786 (line_record_size lines. */
787 size_t n = (m_line_num * line_record_size) / m_total_lines;
788 if (m_line_record.length () == 0
789 || n >= m_line_record.length ())
790 m_line_record.safe_push
791 (file_cache_slot::line_info (m_line_num,
792 m_line_start_idx,
793 line_end - m_data));
797 /* Update m_line_start_idx so that it points to the next line to be
798 read. */
799 if (next_line_start)
800 m_line_start_idx = next_line_start - m_data;
801 else
802 /* We didn't find any terminal '\n'. Let's consider that the end
803 of line is the end of the data in the cache. The next
804 invocation of get_next_line will either read more data from the
805 underlying file or return false early because we've reached the
806 end of the file. */
807 m_line_start_idx = m_nb_read;
809 *line_len = len;
811 return true;
814 /* Consume the next bytes coming from the cache (or from its
815 underlying file if there are remaining unread bytes in the file)
816 until we reach the next end-of-line (or end-of-file). There is no
817 copying from the cache involved. Return TRUE upon successful
818 completion. */
820 bool
821 file_cache_slot::goto_next_line ()
823 char *l;
824 ssize_t len;
826 return get_next_line (&l, &len);
829 /* Read an arbitrary line number LINE_NUM from the file cached in C.
830 If the line was read successfully, *LINE points to the beginning
831 of the line in the file cache and *LINE_LEN is the length of the
832 line. *LINE is not nul-terminated, but may contain zero bytes.
833 *LINE is only valid until the next call of read_line_num.
834 This function returns bool if a line was read. */
836 bool
837 file_cache_slot::read_line_num (size_t line_num,
838 char ** line, ssize_t *line_len)
840 gcc_assert (line_num > 0);
842 if (line_num <= m_line_num)
844 /* We've been asked to read lines that are before m_line_num.
845 So lets use our line record (if it's not empty) to try to
846 avoid re-reading the file from the beginning again. */
848 if (m_line_record.is_empty ())
850 m_line_start_idx = 0;
851 m_line_num = 0;
853 else
855 file_cache_slot::line_info *i = NULL;
856 if (m_total_lines <= line_record_size)
858 /* In languages where the input file is not totally
859 preprocessed up front, the m_total_lines hint
860 can be smaller than the number of lines of the
861 file. In that case, only the first
862 m_total_lines have been recorded.
864 Otherwise, the first m_total_lines we've read have
865 their start/end recorded here. */
866 i = (line_num <= m_total_lines)
867 ? &m_line_record[line_num - 1]
868 : &m_line_record[m_total_lines - 1];
869 gcc_assert (i->line_num <= line_num);
871 else
873 /* So the file had more lines than our line record
874 size. Thus the number of lines we've recorded has
875 been scaled down to line_record_size. Let's
876 pick the start/end of the recorded line that is
877 closest to line_num. */
878 size_t n = (line_num <= m_total_lines)
879 ? line_num * line_record_size / m_total_lines
880 : m_line_record.length () - 1;
881 if (n < m_line_record.length ())
883 i = &m_line_record[n];
884 gcc_assert (i->line_num <= line_num);
888 if (i && i->line_num == line_num)
890 /* We have the start/end of the line. */
891 *line = m_data + i->start_pos;
892 *line_len = i->end_pos - i->start_pos;
893 return true;
896 if (i)
898 m_line_start_idx = i->start_pos;
899 m_line_num = i->line_num - 1;
901 else
903 m_line_start_idx = 0;
904 m_line_num = 0;
909 /* Let's walk from line m_line_num up to line_num - 1, without
910 copying any line. */
911 while (m_line_num < line_num - 1)
912 if (!goto_next_line ())
913 return false;
915 /* The line we want is the next one. Let's read and copy it back to
916 the caller. */
917 return get_next_line (line, line_len);
920 /* Return the physical source line that corresponds to FILE_PATH/LINE.
921 The line is not nul-terminated. The returned pointer is only
922 valid until the next call of location_get_source_line.
923 Note that the line can contain several null characters,
924 so the returned value's length has the actual length of the line.
925 If the function fails, a NULL char_span is returned. */
927 char_span
928 location_get_source_line (const char *file_path, int line)
930 char *buffer = NULL;
931 ssize_t len;
933 if (line == 0)
934 return char_span (NULL, 0);
936 if (file_path == NULL)
937 return char_span (NULL, 0);
939 diagnostic_file_cache_init ();
941 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
942 if (c == NULL)
943 return char_span (NULL, 0);
945 bool read = c->read_line_num (line, &buffer, &len);
946 if (!read)
947 return char_span (NULL, 0);
949 return char_span (buffer, len);
952 /* Determine if FILE_PATH missing a trailing newline on its final line.
953 Only valid to call once all of the file has been loaded, by
954 requesting a line number beyond the end of the file. */
956 bool
957 location_missing_trailing_newline (const char *file_path)
959 diagnostic_file_cache_init ();
961 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
962 if (c == NULL)
963 return false;
965 return c->missing_trailing_newline_p ();
968 /* Test if the location originates from the spelling location of a
969 builtin-tokens. That is, return TRUE if LOC is a (possibly
970 virtual) location of a built-in token that appears in the expansion
971 list of a macro. Please note that this function also works on
972 tokens that result from built-in tokens. For instance, the
973 function would return true if passed a token "4" that is the result
974 of the expansion of the built-in __LINE__ macro. */
975 bool
976 is_location_from_builtin_token (location_t loc)
978 const line_map_ordinary *map = NULL;
979 loc = linemap_resolve_location (line_table, loc,
980 LRK_SPELLING_LOCATION, &map);
981 return loc == BUILTINS_LOCATION;
984 /* Expand the source location LOC into a human readable location. If
985 LOC is virtual, it resolves to the expansion point of the involved
986 macro. If LOC resolves to a builtin location, the file name of the
987 readable location is set to the string "<built-in>". */
989 expanded_location
990 expand_location (location_t loc)
992 return expand_location_1 (loc, /*expansion_point_p=*/true,
993 LOCATION_ASPECT_CARET);
996 /* Expand the source location LOC into a human readable location. If
997 LOC is virtual, it resolves to the expansion location of the
998 relevant macro. If LOC resolves to a builtin location, the file
999 name of the readable location is set to the string
1000 "<built-in>". */
1002 expanded_location
1003 expand_location_to_spelling_point (location_t loc,
1004 enum location_aspect aspect)
1006 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
1009 /* The rich_location class within libcpp requires a way to expand
1010 location_t instances, and relies on the client code
1011 providing a symbol named
1012 linemap_client_expand_location_to_spelling_point
1013 to do this.
1015 This is the implementation for libcommon.a (all host binaries),
1016 which simply calls into expand_location_1. */
1018 expanded_location
1019 linemap_client_expand_location_to_spelling_point (location_t loc,
1020 enum location_aspect aspect)
1022 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
1026 /* If LOCATION is in a system header and if it is a virtual location
1027 for a token coming from the expansion of a macro, unwind it to
1028 the location of the expansion point of the macro. If the expansion
1029 point is also in a system header return the original LOCATION.
1030 Otherwise, return the location of the expansion point.
1032 This is used for instance when we want to emit diagnostics about a
1033 token that may be located in a macro that is itself defined in a
1034 system header, for example, for the NULL macro. In such a case, if
1035 LOCATION were passed directly to diagnostic functions such as
1036 warning_at, the diagnostic would be suppressed (unless
1037 -Wsystem-headers). */
1039 location_t
1040 expansion_point_location_if_in_system_header (location_t location)
1042 if (!in_system_header_at (location))
1043 return location;
1045 location_t xloc = linemap_resolve_location (line_table, location,
1046 LRK_MACRO_EXPANSION_POINT,
1047 NULL);
1048 return in_system_header_at (xloc) ? location : xloc;
1051 /* If LOCATION is a virtual location for a token coming from the expansion
1052 of a macro, unwind to the location of the expansion point of the macro. */
1054 location_t
1055 expansion_point_location (location_t location)
1057 return linemap_resolve_location (line_table, location,
1058 LRK_MACRO_EXPANSION_POINT, NULL);
1061 /* Construct a location with caret at CARET, ranging from START to
1062 finish e.g.
1064 11111111112
1065 12345678901234567890
1067 523 return foo + bar;
1068 ~~~~^~~~~
1071 The location's caret is at the "+", line 523 column 15, but starts
1072 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
1073 of "bar" at column 19. */
1075 location_t
1076 make_location (location_t caret, location_t start, location_t finish)
1078 location_t pure_loc = get_pure_location (caret);
1079 source_range src_range;
1080 src_range.m_start = get_start (start);
1081 src_range.m_finish = get_finish (finish);
1082 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
1083 pure_loc,
1084 src_range,
1085 NULL);
1086 return combined_loc;
1089 /* Same as above, but taking a source range rather than two locations. */
1091 location_t
1092 make_location (location_t caret, source_range src_range)
1094 location_t pure_loc = get_pure_location (caret);
1095 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
1098 /* An expanded_location stores the column in byte units. This function
1099 converts that column to display units. That requires reading the associated
1100 source line in order to calculate the display width. If that cannot be done
1101 for any reason, then returns the byte column as a fallback. */
1103 location_compute_display_column (expanded_location exploc,
1104 const cpp_char_column_policy &policy)
1106 if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1107 return exploc.column;
1108 char_span line = location_get_source_line (exploc.file, exploc.line);
1109 /* If line is NULL, this function returns exploc.column which is the
1110 desired fallback. */
1111 return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
1112 exploc.column, policy);
1115 /* Dump statistics to stderr about the memory usage of the line_table
1116 set of line maps. This also displays some statistics about macro
1117 expansion. */
1119 void
1120 dump_line_table_statistics (void)
1122 struct linemap_stats s;
1123 long total_used_map_size,
1124 macro_maps_size,
1125 total_allocated_map_size;
1127 memset (&s, 0, sizeof (s));
1129 linemap_get_statistics (line_table, &s);
1131 macro_maps_size = s.macro_maps_used_size
1132 + s.macro_maps_locations_size;
1134 total_allocated_map_size = s.ordinary_maps_allocated_size
1135 + s.macro_maps_allocated_size
1136 + s.macro_maps_locations_size;
1138 total_used_map_size = s.ordinary_maps_used_size
1139 + s.macro_maps_used_size
1140 + s.macro_maps_locations_size;
1142 fprintf (stderr, "Number of expanded macros: %5ld\n",
1143 s.num_expanded_macros);
1144 if (s.num_expanded_macros != 0)
1145 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
1146 s.num_macro_tokens / s.num_expanded_macros);
1147 fprintf (stderr,
1148 "\nLine Table allocations during the "
1149 "compilation process\n");
1150 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
1151 SIZE_AMOUNT (s.num_ordinary_maps_used));
1152 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
1153 SIZE_AMOUNT (s.ordinary_maps_used_size));
1154 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
1155 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1156 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
1157 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1158 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
1159 SIZE_AMOUNT (s.num_macro_maps_used));
1160 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
1161 SIZE_AMOUNT (s.macro_maps_used_size));
1162 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
1163 SIZE_AMOUNT (s.macro_maps_locations_size));
1164 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
1165 SIZE_AMOUNT (macro_maps_size));
1166 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
1167 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1168 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
1169 SIZE_AMOUNT (total_allocated_map_size));
1170 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
1171 SIZE_AMOUNT (total_used_map_size));
1172 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
1173 SIZE_AMOUNT (s.adhoc_table_size));
1174 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
1175 SIZE_AMOUNT (s.adhoc_table_entries_used));
1176 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
1177 SIZE_AMOUNT (line_table->num_optimized_ranges));
1178 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
1179 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
1181 fprintf (stderr, "\n");
1184 /* Get location one beyond the final location in ordinary map IDX. */
1186 static location_t
1187 get_end_location (class line_maps *set, unsigned int idx)
1189 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1190 return set->highest_location;
1192 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1193 return MAP_START_LOCATION (next_map);
1196 /* Helper function for write_digit_row. */
1198 static void
1199 write_digit (FILE *stream, int digit)
1201 fputc ('0' + (digit % 10), stream);
1204 /* Helper function for dump_location_info.
1205 Write a row of numbers to STREAM, numbering a source line,
1206 giving the units, tens, hundreds etc of the column number. */
1208 static void
1209 write_digit_row (FILE *stream, int indent,
1210 const line_map_ordinary *map,
1211 location_t loc, int max_col, int divisor)
1213 fprintf (stream, "%*c", indent, ' ');
1214 fprintf (stream, "|");
1215 for (int column = 1; column < max_col; column++)
1217 location_t column_loc = loc + (column << map->m_range_bits);
1218 write_digit (stream, column_loc / divisor);
1220 fprintf (stream, "\n");
1223 /* Write a half-closed (START) / half-open (END) interval of
1224 location_t to STREAM. */
1226 static void
1227 dump_location_range (FILE *stream,
1228 location_t start, location_t end)
1230 fprintf (stream,
1231 " location_t interval: %u <= loc < %u\n",
1232 start, end);
1235 /* Write a labelled description of a half-closed (START) / half-open (END)
1236 interval of location_t to STREAM. */
1238 static void
1239 dump_labelled_location_range (FILE *stream,
1240 const char *name,
1241 location_t start, location_t end)
1243 fprintf (stream, "%s\n", name);
1244 dump_location_range (stream, start, end);
1245 fprintf (stream, "\n");
1248 /* Write a visualization of the locations in the line_table to STREAM. */
1250 void
1251 dump_location_info (FILE *stream)
1253 /* Visualize the reserved locations. */
1254 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1255 0, RESERVED_LOCATION_COUNT);
1257 /* Visualize the ordinary line_map instances, rendering the sources. */
1258 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1260 location_t end_location = get_end_location (line_table, idx);
1261 /* half-closed: doesn't include this one. */
1263 const line_map_ordinary *map
1264 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1265 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1266 dump_location_range (stream,
1267 MAP_START_LOCATION (map), end_location);
1268 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1269 fprintf (stream, " starting at line: %i\n",
1270 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1271 fprintf (stream, " column and range bits: %i\n",
1272 map->m_column_and_range_bits);
1273 fprintf (stream, " column bits: %i\n",
1274 map->m_column_and_range_bits - map->m_range_bits);
1275 fprintf (stream, " range bits: %i\n",
1276 map->m_range_bits);
1277 const char * reason;
1278 switch (map->reason) {
1279 case LC_ENTER:
1280 reason = "LC_ENTER";
1281 break;
1282 case LC_LEAVE:
1283 reason = "LC_LEAVE";
1284 break;
1285 case LC_RENAME:
1286 reason = "LC_RENAME";
1287 break;
1288 case LC_RENAME_VERBATIM:
1289 reason = "LC_RENAME_VERBATIM";
1290 break;
1291 case LC_ENTER_MACRO:
1292 reason = "LC_RENAME_MACRO";
1293 break;
1294 default:
1295 reason = "Unknown";
1297 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1299 const line_map_ordinary *includer_map
1300 = linemap_included_from_linemap (line_table, map);
1301 fprintf (stream, " included from location: %d",
1302 linemap_included_from (map));
1303 if (includer_map) {
1304 fprintf (stream, " (in ordinary map %d)",
1305 int (includer_map - line_table->info_ordinary.maps));
1307 fprintf (stream, "\n");
1309 /* Render the span of source lines that this "map" covers. */
1310 for (location_t loc = MAP_START_LOCATION (map);
1311 loc < end_location;
1312 loc += (1 << map->m_range_bits) )
1314 gcc_assert (pure_location_p (line_table, loc) );
1316 expanded_location exploc
1317 = linemap_expand_location (line_table, map, loc);
1319 if (exploc.column == 0)
1321 /* Beginning of a new source line: draw the line. */
1323 char_span line_text = location_get_source_line (exploc.file,
1324 exploc.line);
1325 if (!line_text)
1326 break;
1327 fprintf (stream,
1328 "%s:%3i|loc:%5i|%.*s\n",
1329 exploc.file, exploc.line,
1330 loc,
1331 (int)line_text.length (), line_text.get_buffer ());
1333 /* "loc" is at column 0, which means "the whole line".
1334 Render the locations *within* the line, by underlining
1335 it, showing the location_t numeric values
1336 at each column. */
1337 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1338 if (max_col > line_text.length ())
1339 max_col = line_text.length () + 1;
1341 int len_lnum = num_digits (exploc.line);
1342 if (len_lnum < 3)
1343 len_lnum = 3;
1344 int len_loc = num_digits (loc);
1345 if (len_loc < 5)
1346 len_loc = 5;
1348 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1350 /* Thousands. */
1351 if (end_location > 999)
1352 write_digit_row (stream, indent, map, loc, max_col, 1000);
1354 /* Hundreds. */
1355 if (end_location > 99)
1356 write_digit_row (stream, indent, map, loc, max_col, 100);
1358 /* Tens. */
1359 write_digit_row (stream, indent, map, loc, max_col, 10);
1361 /* Units. */
1362 write_digit_row (stream, indent, map, loc, max_col, 1);
1365 fprintf (stream, "\n");
1368 /* Visualize unallocated values. */
1369 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1370 line_table->highest_location,
1371 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1373 /* Visualize the macro line_map instances, rendering the sources. */
1374 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1376 /* Each macro map that is allocated owns location_t values
1377 that are *lower* that the one before them.
1378 Hence it's meaningful to view them either in order of ascending
1379 source locations, or in order of ascending macro map index. */
1380 const bool ascending_location_ts = true;
1381 unsigned int idx = (ascending_location_ts
1382 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1383 : i);
1384 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1385 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1386 idx,
1387 linemap_map_get_macro_name (map),
1388 MACRO_MAP_NUM_MACRO_TOKENS (map));
1389 dump_location_range (stream,
1390 map->start_location,
1391 (map->start_location
1392 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1393 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1394 "expansion point is location %i",
1395 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1396 fprintf (stream, " map->start_location: %u\n",
1397 map->start_location);
1399 fprintf (stream, " macro_locations:\n");
1400 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1402 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1403 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1405 /* linemap_add_macro_token encodes token numbers in an expansion
1406 by putting them after MAP_START_LOCATION. */
1408 /* I'm typically seeing 4 uninitialized entries at the end of
1409 0xafafafaf.
1410 This appears to be due to macro.cc:replace_args
1411 adding 2 extra args for padding tokens; presumably there may
1412 be a leading and/or trailing padding token injected,
1413 each for 2 more location slots.
1414 This would explain there being up to 4 location_ts slots
1415 that may be uninitialized. */
1417 fprintf (stream, " %u: %u, %u\n",
1421 if (x == y)
1423 if (x < MAP_START_LOCATION (map))
1424 inform (x, "token %u has %<x-location == y-location == %u%>",
1425 i, x);
1426 else
1427 fprintf (stream,
1428 "x-location == y-location == %u encodes token # %u\n",
1429 x, x - MAP_START_LOCATION (map));
1431 else
1433 inform (x, "token %u has %<x-location == %u%>", i, x);
1434 inform (x, "token %u has %<y-location == %u%>", i, y);
1437 fprintf (stream, "\n");
1440 /* It appears that MAX_LOCATION_T itself is never assigned to a
1441 macro map, presumably due to an off-by-one error somewhere
1442 between the logic in linemap_enter_macro and
1443 LINEMAPS_MACRO_LOWEST_LOCATION. */
1444 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1445 MAX_LOCATION_T,
1446 MAX_LOCATION_T + 1);
1448 /* Visualize ad-hoc values. */
1449 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1450 MAX_LOCATION_T + 1, UINT_MAX);
1453 /* string_concat's constructor. */
1455 string_concat::string_concat (int num, location_t *locs)
1456 : m_num (num)
1458 m_locs = ggc_vec_alloc <location_t> (num);
1459 for (int i = 0; i < num; i++)
1460 m_locs[i] = locs[i];
1463 /* string_concat_db's constructor. */
1465 string_concat_db::string_concat_db ()
1467 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1470 /* Record that a string concatenation occurred, covering NUM
1471 string literal tokens. LOCS is an array of size NUM, containing the
1472 locations of the tokens. A copy of LOCS is taken. */
1474 void
1475 string_concat_db::record_string_concatenation (int num, location_t *locs)
1477 gcc_assert (num > 1);
1478 gcc_assert (locs);
1480 location_t key_loc = get_key_loc (locs[0]);
1481 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1482 any data now recorded under key 'key_loc' would be overwritten by a
1483 subsequent call with the same key 'key_loc'. */
1484 if (RESERVED_LOCATION_P (key_loc))
1485 return;
1487 string_concat *concat
1488 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1489 m_table->put (key_loc, concat);
1492 /* Determine if LOC was the location of the initial token of a
1493 concatenation of string literal tokens.
1494 If so, *OUT_NUM is written to with the number of tokens, and
1495 *OUT_LOCS with the location of an array of locations of the
1496 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1497 storage owned by the string_concat_db.
1498 Otherwise, return false. */
1500 bool
1501 string_concat_db::get_string_concatenation (location_t loc,
1502 int *out_num,
1503 location_t **out_locs)
1505 gcc_assert (out_num);
1506 gcc_assert (out_locs);
1508 location_t key_loc = get_key_loc (loc);
1509 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1510 discussion in 'string_concat_db::record_string_concatenation'. */
1511 if (RESERVED_LOCATION_P (key_loc))
1512 return false;
1514 string_concat **concat = m_table->get (key_loc);
1515 if (!concat)
1516 return false;
1518 *out_num = (*concat)->m_num;
1519 *out_locs =(*concat)->m_locs;
1520 return true;
1523 /* Internal function. Canonicalize LOC into a form suitable for
1524 use as a key within the database, stripping away macro expansion,
1525 ad-hoc information, and range information, using the location of
1526 the start of LOC within an ordinary linemap. */
1528 location_t
1529 string_concat_db::get_key_loc (location_t loc)
1531 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1532 NULL);
1534 loc = get_range_from_loc (line_table, loc).m_start;
1536 return loc;
1539 /* Helper class for use within get_substring_ranges_for_loc.
1540 An vec of cpp_string with responsibility for releasing all of the
1541 str->text for each str in the vector. */
1543 class auto_cpp_string_vec : public auto_vec <cpp_string>
1545 public:
1546 auto_cpp_string_vec (int alloc)
1547 : auto_vec <cpp_string> (alloc) {}
1549 ~auto_cpp_string_vec ()
1551 /* Clean up the copies within this vec. */
1552 int i;
1553 cpp_string *str;
1554 FOR_EACH_VEC_ELT (*this, i, str)
1555 free (const_cast <unsigned char *> (str->text));
1559 /* Attempt to populate RANGES with source location information on the
1560 individual characters within the string literal found at STRLOC.
1561 If CONCATS is non-NULL, then any string literals that the token at
1562 STRLOC was concatenated with are also added to RANGES.
1564 Return NULL if successful, or an error message if any errors occurred (in
1565 which case RANGES may be only partially populated and should not
1566 be used).
1568 This is implemented by re-parsing the relevant source line(s). */
1570 static const char *
1571 get_substring_ranges_for_loc (cpp_reader *pfile,
1572 string_concat_db *concats,
1573 location_t strloc,
1574 enum cpp_ttype type,
1575 cpp_substring_ranges &ranges)
1577 gcc_assert (pfile);
1579 if (strloc == UNKNOWN_LOCATION)
1580 return "unknown location";
1582 /* Reparsing the strings requires accurate location information.
1583 If -ftrack-macro-expansion has been overridden from its default
1584 of 2, then we might have a location of a macro expansion point,
1585 rather than the location of the literal itself.
1586 Avoid this by requiring that we have full macro expansion tracking
1587 for substring locations to be available. */
1588 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1589 return "track_macro_expansion != 2";
1591 /* If #line or # 44 "file"-style directives are present, then there's
1592 no guarantee that the line numbers we have can be used to locate
1593 the strings. For example, we might have a .i file with # directives
1594 pointing back to lines within a .c file, but the .c file might
1595 have been edited since the .i file was created.
1596 In such a case, the safest course is to disable on-demand substring
1597 locations. */
1598 if (line_table->seen_line_directive)
1599 return "seen line directive";
1601 /* If string concatenation has occurred at STRLOC, get the locations
1602 of all of the literal tokens making up the compound string.
1603 Otherwise, just use STRLOC. */
1604 int num_locs = 1;
1605 location_t *strlocs = &strloc;
1606 if (concats)
1607 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1609 auto_cpp_string_vec strs (num_locs);
1610 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1611 for (int i = 0; i < num_locs; i++)
1613 /* Get range of strloc. We will use it to locate the start and finish
1614 of the literal token within the line. */
1615 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1617 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1619 /* If the string token was within a macro expansion, then we can
1620 cope with it for the simple case where we have a single token.
1621 Otherwise, bail out. */
1622 if (src_range.m_start != src_range.m_finish)
1623 return "macro expansion";
1625 else
1627 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1628 /* If so, we can't reliably determine where the token started within
1629 its line. */
1630 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1632 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1633 /* If so, we can't reliably determine where the token finished
1634 within its line. */
1635 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1638 expanded_location start
1639 = expand_location_to_spelling_point (src_range.m_start,
1640 LOCATION_ASPECT_START);
1641 expanded_location finish
1642 = expand_location_to_spelling_point (src_range.m_finish,
1643 LOCATION_ASPECT_FINISH);
1644 if (start.file != finish.file)
1645 return "range endpoints are in different files";
1646 if (start.line != finish.line)
1647 return "range endpoints are on different lines";
1648 if (start.column > finish.column)
1649 return "range endpoints are reversed";
1651 char_span line = location_get_source_line (start.file, start.line);
1652 if (!line)
1653 return "unable to read source line";
1655 /* Determine the location of the literal (including quotes
1656 and leading prefix chars, such as the 'u' in a u""
1657 token). */
1658 size_t literal_length = finish.column - start.column + 1;
1660 /* Ensure that we don't crash if we got the wrong location. */
1661 if (start.column < 1)
1662 return "zero start column";
1663 if (line.length () < (start.column - 1 + literal_length))
1664 return "line is not wide enough";
1666 char_span literal = line.subspan (start.column - 1, literal_length);
1668 cpp_string from;
1669 from.len = literal_length;
1670 /* Make a copy of the literal, to avoid having to rely on
1671 the lifetime of the copy of the line within the cache.
1672 This will be released by the auto_cpp_string_vec dtor. */
1673 from.text = (unsigned char *)literal.xstrdup ();
1674 strs.safe_push (from);
1676 /* For very long lines, a new linemap could have started
1677 halfway through the token.
1678 Ensure that the loc_reader uses the linemap of the
1679 *end* of the token for its start location. */
1680 const line_map_ordinary *start_ord_map;
1681 linemap_resolve_location (line_table, src_range.m_start,
1682 LRK_SPELLING_LOCATION, &start_ord_map);
1683 const line_map_ordinary *final_ord_map;
1684 linemap_resolve_location (line_table, src_range.m_finish,
1685 LRK_SPELLING_LOCATION, &final_ord_map);
1686 if (start_ord_map == NULL || final_ord_map == NULL)
1687 return "failed to get ordinary maps";
1688 /* Bulletproofing. We ought to only have different ordinary maps
1689 for start vs finish due to line-length jumps. */
1690 if (start_ord_map != final_ord_map
1691 && start_ord_map->to_file != final_ord_map->to_file)
1692 return "start and finish are spelled in different ordinary maps";
1693 /* The file from linemap_resolve_location ought to match that from
1694 expand_location_to_spelling_point. */
1695 if (start_ord_map->to_file != start.file)
1696 return "mismatching file after resolving linemap";
1698 location_t start_loc
1699 = linemap_position_for_line_and_column (line_table, final_ord_map,
1700 start.line, start.column);
1702 cpp_string_location_reader loc_reader (start_loc, line_table);
1703 loc_readers.safe_push (loc_reader);
1706 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1707 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1708 loc_readers.address (),
1709 num_locs, &ranges, type);
1710 if (err)
1711 return err;
1713 /* Success: "ranges" should now contain information on the string. */
1714 return NULL;
1717 /* Attempt to populate *OUT_LOC with source location information on the
1718 given characters within the string literal found at STRLOC.
1719 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1720 character set.
1722 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1723 and string literal "012345\n789"
1724 *OUT_LOC is written to with:
1725 "012345\n789"
1726 ~^~~~~
1728 If CONCATS is non-NULL, then any string literals that the token at
1729 STRLOC was concatenated with are also considered.
1731 This is implemented by re-parsing the relevant source line(s).
1733 Return NULL if successful, or an error message if any errors occurred.
1734 Error messages are intended for GCC developers (to help debugging) rather
1735 than for end-users. */
1737 const char *
1738 get_location_within_string (cpp_reader *pfile,
1739 string_concat_db *concats,
1740 location_t strloc,
1741 enum cpp_ttype type,
1742 int caret_idx, int start_idx, int end_idx,
1743 location_t *out_loc)
1745 gcc_checking_assert (caret_idx >= 0);
1746 gcc_checking_assert (start_idx >= 0);
1747 gcc_checking_assert (end_idx >= 0);
1748 gcc_assert (out_loc);
1750 cpp_substring_ranges ranges;
1751 const char *err
1752 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1753 if (err)
1754 return err;
1756 if (caret_idx >= ranges.get_num_ranges ())
1757 return "caret_idx out of range";
1758 if (start_idx >= ranges.get_num_ranges ())
1759 return "start_idx out of range";
1760 if (end_idx >= ranges.get_num_ranges ())
1761 return "end_idx out of range";
1763 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1764 ranges.get_range (start_idx).m_start,
1765 ranges.get_range (end_idx).m_finish);
1766 return NULL;
1769 #if CHECKING_P
1771 namespace selftest {
1773 /* Selftests of location handling. */
1775 /* Attempt to populate *OUT_RANGE with source location information on the
1776 given character within the string literal found at STRLOC.
1777 CHAR_IDX refers to an offset within the execution character set.
1778 If CONCATS is non-NULL, then any string literals that the token at
1779 STRLOC was concatenated with are also considered.
1781 This is implemented by re-parsing the relevant source line(s).
1783 Return NULL if successful, or an error message if any errors occurred.
1784 Error messages are intended for GCC developers (to help debugging) rather
1785 than for end-users. */
1787 static const char *
1788 get_source_range_for_char (cpp_reader *pfile,
1789 string_concat_db *concats,
1790 location_t strloc,
1791 enum cpp_ttype type,
1792 int char_idx,
1793 source_range *out_range)
1795 gcc_checking_assert (char_idx >= 0);
1796 gcc_assert (out_range);
1798 cpp_substring_ranges ranges;
1799 const char *err
1800 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1801 if (err)
1802 return err;
1804 if (char_idx >= ranges.get_num_ranges ())
1805 return "char_idx out of range";
1807 *out_range = ranges.get_range (char_idx);
1808 return NULL;
1811 /* As get_source_range_for_char, but write to *OUT the number
1812 of ranges that are available. */
1814 static const char *
1815 get_num_source_ranges_for_substring (cpp_reader *pfile,
1816 string_concat_db *concats,
1817 location_t strloc,
1818 enum cpp_ttype type,
1819 int *out)
1821 gcc_assert (out);
1823 cpp_substring_ranges ranges;
1824 const char *err
1825 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1827 if (err)
1828 return err;
1830 *out = ranges.get_num_ranges ();
1831 return NULL;
1834 /* Selftests of location handling. */
1836 /* Verify that compare() on linenum_type handles comparisons over the full
1837 range of the type. */
1839 static void
1840 test_linenum_comparisons ()
1842 linenum_type min_line (0);
1843 linenum_type max_line (0xffffffff);
1844 ASSERT_EQ (0, compare (min_line, min_line));
1845 ASSERT_EQ (0, compare (max_line, max_line));
1847 ASSERT_GT (compare (max_line, min_line), 0);
1848 ASSERT_LT (compare (min_line, max_line), 0);
1851 /* Helper function for verifying location data: when location_t
1852 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1853 as having column 0. */
1855 static bool
1856 should_have_column_data_p (location_t loc)
1858 if (IS_ADHOC_LOC (loc))
1859 loc = get_location_from_adhoc_loc (line_table, loc);
1860 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1861 return false;
1862 return true;
1865 /* Selftest for should_have_column_data_p. */
1867 static void
1868 test_should_have_column_data_p ()
1870 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1871 ASSERT_TRUE
1872 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1873 ASSERT_FALSE
1874 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1877 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1878 on LOC. */
1880 static void
1881 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1882 location_t loc)
1884 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1885 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1886 /* If location_t values are sufficiently high, then column numbers
1887 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1888 When close to the threshold, column numbers *may* be present: if
1889 the final linemap before the threshold contains a line that straddles
1890 the threshold, locations in that line have column information. */
1891 if (should_have_column_data_p (loc))
1892 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1895 /* Various selftests involve constructing a line table and one or more
1896 line maps within it.
1898 For maximum test coverage we want to run these tests with a variety
1899 of situations:
1900 - line_table->default_range_bits: some frontends use a non-zero value
1901 and others use zero
1902 - the fallback modes within line-map.cc: there are various threshold
1903 values for location_t beyond line-map.cc changes
1904 behavior (disabling of the range-packing optimization, disabling
1905 of column-tracking). We can exercise these by starting the line_table
1906 at interesting values at or near these thresholds.
1908 The following struct describes a particular case within our test
1909 matrix. */
1911 class line_table_case
1913 public:
1914 line_table_case (int default_range_bits, int base_location)
1915 : m_default_range_bits (default_range_bits),
1916 m_base_location (base_location)
1919 int m_default_range_bits;
1920 int m_base_location;
1923 /* Constructor. Store the old value of line_table, and create a new
1924 one, using sane defaults. */
1926 line_table_test::line_table_test ()
1928 gcc_assert (saved_line_table == NULL);
1929 saved_line_table = line_table;
1930 line_table = ggc_alloc<line_maps> ();
1931 linemap_init (line_table, BUILTINS_LOCATION);
1932 gcc_assert (saved_line_table->reallocator);
1933 line_table->reallocator = saved_line_table->reallocator;
1934 gcc_assert (saved_line_table->round_alloc_size);
1935 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1936 line_table->default_range_bits = 0;
1939 /* Constructor. Store the old value of line_table, and create a new
1940 one, using the sitation described in CASE_. */
1942 line_table_test::line_table_test (const line_table_case &case_)
1944 gcc_assert (saved_line_table == NULL);
1945 saved_line_table = line_table;
1946 line_table = ggc_alloc<line_maps> ();
1947 linemap_init (line_table, BUILTINS_LOCATION);
1948 gcc_assert (saved_line_table->reallocator);
1949 line_table->reallocator = saved_line_table->reallocator;
1950 gcc_assert (saved_line_table->round_alloc_size);
1951 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1952 line_table->default_range_bits = case_.m_default_range_bits;
1953 if (case_.m_base_location)
1955 line_table->highest_location = case_.m_base_location;
1956 line_table->highest_line = case_.m_base_location;
1960 /* Destructor. Restore the old value of line_table. */
1962 line_table_test::~line_table_test ()
1964 gcc_assert (saved_line_table != NULL);
1965 line_table = saved_line_table;
1966 saved_line_table = NULL;
1969 /* Verify basic operation of ordinary linemaps. */
1971 static void
1972 test_accessing_ordinary_linemaps (const line_table_case &case_)
1974 line_table_test ltt (case_);
1976 /* Build a simple linemap describing some locations. */
1977 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1979 linemap_line_start (line_table, 1, 100);
1980 location_t loc_a = linemap_position_for_column (line_table, 1);
1981 location_t loc_b = linemap_position_for_column (line_table, 23);
1983 linemap_line_start (line_table, 2, 100);
1984 location_t loc_c = linemap_position_for_column (line_table, 1);
1985 location_t loc_d = linemap_position_for_column (line_table, 17);
1987 /* Example of a very long line. */
1988 linemap_line_start (line_table, 3, 2000);
1989 location_t loc_e = linemap_position_for_column (line_table, 700);
1991 /* Transitioning back to a short line. */
1992 linemap_line_start (line_table, 4, 0);
1993 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1995 if (should_have_column_data_p (loc_back_to_short))
1997 /* Verify that we switched to short lines in the linemap. */
1998 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1999 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
2002 /* Example of a line that will eventually be seen to be longer
2003 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
2004 below that. */
2005 linemap_line_start (line_table, 5, 2000);
2007 location_t loc_start_of_very_long_line
2008 = linemap_position_for_column (line_table, 2000);
2009 location_t loc_too_wide
2010 = linemap_position_for_column (line_table, 4097);
2011 location_t loc_too_wide_2
2012 = linemap_position_for_column (line_table, 4098);
2014 /* ...and back to a sane line length. */
2015 linemap_line_start (line_table, 6, 100);
2016 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
2018 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2020 /* Multiple files. */
2021 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
2022 linemap_line_start (line_table, 1, 200);
2023 location_t loc_f = linemap_position_for_column (line_table, 150);
2024 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2026 /* Verify that we can recover the location info. */
2027 assert_loceq ("foo.c", 1, 1, loc_a);
2028 assert_loceq ("foo.c", 1, 23, loc_b);
2029 assert_loceq ("foo.c", 2, 1, loc_c);
2030 assert_loceq ("foo.c", 2, 17, loc_d);
2031 assert_loceq ("foo.c", 3, 700, loc_e);
2032 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
2034 /* In the very wide line, the initial location should be fully tracked. */
2035 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
2036 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
2037 be disabled. */
2038 assert_loceq ("foo.c", 5, 0, loc_too_wide);
2039 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
2040 /*...and column-tracking should be re-enabled for subsequent lines. */
2041 assert_loceq ("foo.c", 6, 10, loc_sane_again);
2043 assert_loceq ("bar.c", 1, 150, loc_f);
2045 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2046 ASSERT_TRUE (pure_location_p (line_table, loc_a));
2048 /* Verify using make_location to build a range, and extracting data
2049 back from it. */
2050 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2051 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2052 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2053 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2054 ASSERT_EQ (loc_b, src_range.m_start);
2055 ASSERT_EQ (loc_d, src_range.m_finish);
2058 /* Verify various properties of UNKNOWN_LOCATION. */
2060 static void
2061 test_unknown_location ()
2063 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2064 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2065 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2068 /* Verify various properties of BUILTINS_LOCATION. */
2070 static void
2071 test_builtins ()
2073 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
2074 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2077 /* Regression test for make_location.
2078 Ensure that we use pure locations for the start/finish of the range,
2079 rather than storing a packed or ad-hoc range as the start/finish. */
2081 static void
2082 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2084 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2085 with C++ frontend.
2086 ....................0000000001111111111222.
2087 ....................1234567890123456789012. */
2088 const char *content = " r += !aaa == bbb;\n";
2089 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2090 line_table_test ltt (case_);
2091 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2093 const location_t c11 = linemap_position_for_column (line_table, 11);
2094 const location_t c12 = linemap_position_for_column (line_table, 12);
2095 const location_t c13 = linemap_position_for_column (line_table, 13);
2096 const location_t c14 = linemap_position_for_column (line_table, 14);
2097 const location_t c21 = linemap_position_for_column (line_table, 21);
2099 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2100 return;
2102 /* Use column 13 for the caret location, arbitrarily, to verify that we
2103 handle start != caret. */
2104 const location_t aaa = make_location (c13, c12, c14);
2105 ASSERT_EQ (c13, get_pure_location (aaa));
2106 ASSERT_EQ (c12, get_start (aaa));
2107 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2108 ASSERT_EQ (c14, get_finish (aaa));
2109 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2111 /* Make a location using a location with a range as the start-point. */
2112 const location_t not_aaa = make_location (c11, aaa, c14);
2113 ASSERT_EQ (c11, get_pure_location (not_aaa));
2114 /* It should use the start location of the range, not store the range
2115 itself. */
2116 ASSERT_EQ (c12, get_start (not_aaa));
2117 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2118 ASSERT_EQ (c14, get_finish (not_aaa));
2119 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2121 /* Similarly, make a location with a range as the end-point. */
2122 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2123 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2124 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2125 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2126 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2127 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2128 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2129 /* It should use the finish location of the range, not store the range
2130 itself. */
2131 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2132 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2133 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2134 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2135 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2138 /* Verify reading of input files (e.g. for caret-based diagnostics). */
2140 static void
2141 test_reading_source_line ()
2143 /* Create a tempfile and write some text to it. */
2144 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2145 "01234567890123456789\n"
2146 "This is the test text\n"
2147 "This is the 3rd line");
2149 /* Read back a specific line from the tempfile. */
2150 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2151 ASSERT_TRUE (source_line);
2152 ASSERT_TRUE (source_line.get_buffer () != NULL);
2153 ASSERT_EQ (20, source_line.length ());
2154 ASSERT_TRUE (!strncmp ("This is the 3rd line",
2155 source_line.get_buffer (), source_line.length ()));
2157 source_line = location_get_source_line (tmp.get_filename (), 2);
2158 ASSERT_TRUE (source_line);
2159 ASSERT_TRUE (source_line.get_buffer () != NULL);
2160 ASSERT_EQ (21, source_line.length ());
2161 ASSERT_TRUE (!strncmp ("This is the test text",
2162 source_line.get_buffer (), source_line.length ()));
2164 source_line = location_get_source_line (tmp.get_filename (), 4);
2165 ASSERT_FALSE (source_line);
2166 ASSERT_TRUE (source_line.get_buffer () == NULL);
2169 /* Tests of lexing. */
2171 /* Verify that token TOK from PARSER has cpp_token_as_text
2172 equal to EXPECTED_TEXT. */
2174 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
2175 SELFTEST_BEGIN_STMT \
2176 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
2177 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
2178 SELFTEST_END_STMT
2180 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2181 and ranges from EXP_START_COL to EXP_FINISH_COL.
2182 Use LOC as the effective location of the selftest. */
2184 static void
2185 assert_token_loc_eq (const location &loc,
2186 const cpp_token *tok,
2187 const char *exp_filename, int exp_linenum,
2188 int exp_start_col, int exp_finish_col)
2190 location_t tok_loc = tok->src_loc;
2191 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2192 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2194 /* If location_t values are sufficiently high, then column numbers
2195 will be unavailable. */
2196 if (!should_have_column_data_p (tok_loc))
2197 return;
2199 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2200 source_range tok_range = get_range_from_loc (line_table, tok_loc);
2201 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2202 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2205 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2206 SELFTEST_LOCATION as the effective location of the selftest. */
2208 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2209 EXP_START_COL, EXP_FINISH_COL) \
2210 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2211 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2213 /* Test of lexing a file using libcpp, verifying tokens and their
2214 location information. */
2216 static void
2217 test_lexer (const line_table_case &case_)
2219 /* Create a tempfile and write some text to it. */
2220 const char *content =
2221 /*00000000011111111112222222222333333.3333444444444.455555555556
2222 12345678901234567890123456789012345.6789012345678.901234567890. */
2223 ("test_name /* c-style comment */\n"
2224 " \"test literal\"\n"
2225 " // test c++-style comment\n"
2226 " 42\n");
2227 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2229 line_table_test ltt (case_);
2231 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2233 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2234 ASSERT_NE (fname, NULL);
2236 /* Verify that we get the expected tokens back, with the correct
2237 location information. */
2239 location_t loc;
2240 const cpp_token *tok;
2241 tok = cpp_get_token_with_location (parser, &loc);
2242 ASSERT_NE (tok, NULL);
2243 ASSERT_EQ (tok->type, CPP_NAME);
2244 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2245 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2247 tok = cpp_get_token_with_location (parser, &loc);
2248 ASSERT_NE (tok, NULL);
2249 ASSERT_EQ (tok->type, CPP_STRING);
2250 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2251 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2253 tok = cpp_get_token_with_location (parser, &loc);
2254 ASSERT_NE (tok, NULL);
2255 ASSERT_EQ (tok->type, CPP_NUMBER);
2256 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2257 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2259 tok = cpp_get_token_with_location (parser, &loc);
2260 ASSERT_NE (tok, NULL);
2261 ASSERT_EQ (tok->type, CPP_EOF);
2263 cpp_finish (parser, NULL);
2264 cpp_destroy (parser);
2267 /* Forward decls. */
2269 class lexer_test;
2270 class lexer_test_options;
2272 /* A class for specifying options of a lexer_test.
2273 The "apply" vfunc is called during the lexer_test constructor. */
2275 class lexer_test_options
2277 public:
2278 virtual void apply (lexer_test &) = 0;
2281 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2282 in its dtor.
2284 This is needed by struct lexer_test to ensure that the cleanup of the
2285 cpp_reader happens *after* the cleanup of the temp_source_file. */
2287 class cpp_reader_ptr
2289 public:
2290 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2292 ~cpp_reader_ptr ()
2294 cpp_finish (m_ptr, NULL);
2295 cpp_destroy (m_ptr);
2298 operator cpp_reader * () const { return m_ptr; }
2300 private:
2301 cpp_reader *m_ptr;
2304 /* A struct for writing lexer tests. */
2306 class lexer_test
2308 public:
2309 lexer_test (const line_table_case &case_, const char *content,
2310 lexer_test_options *options);
2311 ~lexer_test ();
2313 const cpp_token *get_token ();
2315 /* The ordering of these fields matters.
2316 The line_table_test must be first, since the cpp_reader_ptr
2317 uses it.
2318 The cpp_reader must be cleaned up *after* the temp_source_file
2319 since the filenames in input.cc's input cache are owned by the
2320 cpp_reader; in particular, when ~temp_source_file evicts the
2321 filename the filenames must still be alive. */
2322 line_table_test m_ltt;
2323 cpp_reader_ptr m_parser;
2324 temp_source_file m_tempfile;
2325 string_concat_db m_concats;
2326 bool m_implicitly_expect_EOF;
2329 /* Use an EBCDIC encoding for the execution charset, specifically
2330 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2332 This exercises iconv integration within libcpp.
2333 Not every build of iconv supports the given charset,
2334 so we need to flag this error and handle it gracefully. */
2336 class ebcdic_execution_charset : public lexer_test_options
2338 public:
2339 ebcdic_execution_charset () : m_num_iconv_errors (0)
2341 gcc_assert (s_singleton == NULL);
2342 s_singleton = this;
2344 ~ebcdic_execution_charset ()
2346 gcc_assert (s_singleton == this);
2347 s_singleton = NULL;
2350 void apply (lexer_test &test) final override
2352 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2353 cpp_opts->narrow_charset = "IBM1047";
2355 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2356 callbacks->diagnostic = on_diagnostic;
2359 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2360 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2361 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2362 rich_location *richloc ATTRIBUTE_UNUSED,
2363 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2364 ATTRIBUTE_FPTR_PRINTF(5,0)
2366 gcc_assert (s_singleton);
2367 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2368 const char *msg = "conversion from %s to %s not supported by iconv";
2369 #ifdef ENABLE_NLS
2370 msg = dgettext ("cpplib", msg);
2371 #endif
2372 /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
2373 when the local iconv build doesn't support the conversion. */
2374 if (strcmp (msgid, msg) == 0)
2376 s_singleton->m_num_iconv_errors++;
2377 return true;
2380 /* Otherwise, we have an unexpected error. */
2381 abort ();
2384 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2386 private:
2387 static ebcdic_execution_charset *s_singleton;
2388 int m_num_iconv_errors;
2391 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2393 /* A lexer_test_options subclass that records a list of diagnostic
2394 messages emitted by the lexer. */
2396 class lexer_diagnostic_sink : public lexer_test_options
2398 public:
2399 lexer_diagnostic_sink ()
2401 gcc_assert (s_singleton == NULL);
2402 s_singleton = this;
2404 ~lexer_diagnostic_sink ()
2406 gcc_assert (s_singleton == this);
2407 s_singleton = NULL;
2409 int i;
2410 char *str;
2411 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2412 free (str);
2415 void apply (lexer_test &test) final override
2417 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2418 callbacks->diagnostic = on_diagnostic;
2421 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2422 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2423 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2424 rich_location *richloc ATTRIBUTE_UNUSED,
2425 const char *msgid, va_list *ap)
2426 ATTRIBUTE_FPTR_PRINTF(5,0)
2428 char *msg = xvasprintf (msgid, *ap);
2429 s_singleton->m_diagnostics.safe_push (msg);
2430 return true;
2433 auto_vec<char *> m_diagnostics;
2435 private:
2436 static lexer_diagnostic_sink *s_singleton;
2439 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2441 /* Constructor. Override line_table with a new instance based on CASE_,
2442 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2443 start parsing the tempfile. */
2445 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2446 lexer_test_options *options)
2447 : m_ltt (case_),
2448 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2449 /* Create a tempfile and write the text to it. */
2450 m_tempfile (SELFTEST_LOCATION, ".c", content),
2451 m_concats (),
2452 m_implicitly_expect_EOF (true)
2454 if (options)
2455 options->apply (*this);
2457 cpp_init_iconv (m_parser);
2459 /* Parse the file. */
2460 const char *fname = cpp_read_main_file (m_parser,
2461 m_tempfile.get_filename ());
2462 ASSERT_NE (fname, NULL);
2465 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2467 lexer_test::~lexer_test ()
2469 location_t loc;
2470 const cpp_token *tok;
2472 if (m_implicitly_expect_EOF)
2474 tok = cpp_get_token_with_location (m_parser, &loc);
2475 ASSERT_NE (tok, NULL);
2476 ASSERT_EQ (tok->type, CPP_EOF);
2480 /* Get the next token from m_parser. */
2482 const cpp_token *
2483 lexer_test::get_token ()
2485 location_t loc;
2486 const cpp_token *tok;
2488 tok = cpp_get_token_with_location (m_parser, &loc);
2489 ASSERT_NE (tok, NULL);
2490 return tok;
2493 /* Verify that locations within string literals are correctly handled. */
2495 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2496 using the string concatenation database for TEST.
2498 Assert that the character at index IDX is on EXPECTED_LINE,
2499 and that it begins at column EXPECTED_START_COL and ends at
2500 EXPECTED_FINISH_COL (unless the locations are beyond
2501 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2502 columns). */
2504 static void
2505 assert_char_at_range (const location &loc,
2506 lexer_test& test,
2507 location_t strloc, enum cpp_ttype type, int idx,
2508 int expected_line, int expected_start_col,
2509 int expected_finish_col)
2511 cpp_reader *pfile = test.m_parser;
2512 string_concat_db *concats = &test.m_concats;
2514 source_range actual_range = source_range();
2515 const char *err
2516 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2517 &actual_range);
2518 if (should_have_column_data_p (strloc))
2519 ASSERT_EQ_AT (loc, NULL, err);
2520 else
2522 ASSERT_STREQ_AT (loc,
2523 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2524 err);
2525 return;
2528 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2529 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2530 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2531 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2533 if (should_have_column_data_p (actual_range.m_start))
2535 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2536 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2538 if (should_have_column_data_p (actual_range.m_finish))
2540 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2541 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2545 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2546 the effective location of any errors. */
2548 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2549 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2550 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2551 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2552 (EXPECTED_FINISH_COL))
2554 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2555 using the string concatenation database for TEST.
2557 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2559 static void
2560 assert_num_substring_ranges (const location &loc,
2561 lexer_test& test,
2562 location_t strloc,
2563 enum cpp_ttype type,
2564 int expected_num_ranges)
2566 cpp_reader *pfile = test.m_parser;
2567 string_concat_db *concats = &test.m_concats;
2569 int actual_num_ranges = -1;
2570 const char *err
2571 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2572 &actual_num_ranges);
2573 if (should_have_column_data_p (strloc))
2574 ASSERT_EQ_AT (loc, NULL, err);
2575 else
2577 ASSERT_STREQ_AT (loc,
2578 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2579 err);
2580 return;
2582 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2585 /* Macro for calling assert_num_substring_ranges, supplying
2586 SELFTEST_LOCATION for the effective location of any errors. */
2588 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2589 EXPECTED_NUM_RANGES) \
2590 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2591 (TYPE), (EXPECTED_NUM_RANGES))
2594 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2595 returns an error (using the string concatenation database for TEST). */
2597 static void
2598 assert_has_no_substring_ranges (const location &loc,
2599 lexer_test& test,
2600 location_t strloc,
2601 enum cpp_ttype type,
2602 const char *expected_err)
2604 cpp_reader *pfile = test.m_parser;
2605 string_concat_db *concats = &test.m_concats;
2606 cpp_substring_ranges ranges;
2607 const char *actual_err
2608 = get_substring_ranges_for_loc (pfile, concats, strloc,
2609 type, ranges);
2610 if (should_have_column_data_p (strloc))
2611 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2612 else
2613 ASSERT_STREQ_AT (loc,
2614 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2615 actual_err);
2618 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2619 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2620 (STRLOC), (TYPE), (ERR))
2622 /* Lex a simple string literal. Verify the substring location data, before
2623 and after running cpp_interpret_string on it. */
2625 static void
2626 test_lexer_string_locations_simple (const line_table_case &case_)
2628 /* Digits 0-9 (with 0 at column 10), the simple way.
2629 ....................000000000.11111111112.2222222223333333333
2630 ....................123456789.01234567890.1234567890123456789
2631 We add a trailing comment to ensure that we correctly locate
2632 the end of the string literal token. */
2633 const char *content = " \"0123456789\" /* not a string */\n";
2634 lexer_test test (case_, content, NULL);
2636 /* Verify that we get the expected token back, with the correct
2637 location information. */
2638 const cpp_token *tok = test.get_token ();
2639 ASSERT_EQ (tok->type, CPP_STRING);
2640 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2641 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2643 /* At this point in lexing, the quote characters are treated as part of
2644 the string (they are stripped off by cpp_interpret_string). */
2646 ASSERT_EQ (tok->val.str.len, 12);
2648 /* Verify that cpp_interpret_string works. */
2649 cpp_string dst_string;
2650 const enum cpp_ttype type = CPP_STRING;
2651 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2652 &dst_string, type);
2653 ASSERT_TRUE (result);
2654 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2655 free (const_cast <unsigned char *> (dst_string.text));
2657 /* Verify ranges of individual characters. This no longer includes the
2658 opening quote, but does include the closing quote. */
2659 for (int i = 0; i <= 10; i++)
2660 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2661 10 + i, 10 + i);
2663 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2666 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2667 encoding. */
2669 static void
2670 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2672 /* EBCDIC support requires iconv. */
2673 if (!HAVE_ICONV)
2674 return;
2676 /* Digits 0-9 (with 0 at column 10), the simple way.
2677 ....................000000000.11111111112.2222222223333333333
2678 ....................123456789.01234567890.1234567890123456789
2679 We add a trailing comment to ensure that we correctly locate
2680 the end of the string literal token. */
2681 const char *content = " \"0123456789\" /* not a string */\n";
2682 ebcdic_execution_charset use_ebcdic;
2683 lexer_test test (case_, content, &use_ebcdic);
2685 /* Verify that we get the expected token back, with the correct
2686 location information. */
2687 const cpp_token *tok = test.get_token ();
2688 ASSERT_EQ (tok->type, CPP_STRING);
2689 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2690 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2692 /* At this point in lexing, the quote characters are treated as part of
2693 the string (they are stripped off by cpp_interpret_string). */
2695 ASSERT_EQ (tok->val.str.len, 12);
2697 /* The remainder of the test requires an iconv implementation that
2698 can convert from UTF-8 to the EBCDIC encoding requested above. */
2699 if (use_ebcdic.iconv_errors_occurred_p ())
2700 return;
2702 /* Verify that cpp_interpret_string works. */
2703 cpp_string dst_string;
2704 const enum cpp_ttype type = CPP_STRING;
2705 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2706 &dst_string, type);
2707 ASSERT_TRUE (result);
2708 /* We should now have EBCDIC-encoded text, specifically
2709 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2710 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2711 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2712 (const char *)dst_string.text);
2713 free (const_cast <unsigned char *> (dst_string.text));
2715 /* Verify that we don't attempt to record substring location information
2716 for such cases. */
2717 ASSERT_HAS_NO_SUBSTRING_RANGES
2718 (test, tok->src_loc, type,
2719 "execution character set != source character set");
2722 /* Lex a string literal containing a hex-escaped character.
2723 Verify the substring location data, before and after running
2724 cpp_interpret_string on it. */
2726 static void
2727 test_lexer_string_locations_hex (const line_table_case &case_)
2729 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2730 and with a space in place of digit 6, to terminate the escaped
2731 hex code.
2732 ....................000000000.111111.11112222.
2733 ....................123456789.012345.67890123. */
2734 const char *content = " \"01234\\x35 789\"\n";
2735 lexer_test test (case_, content, NULL);
2737 /* Verify that we get the expected token back, with the correct
2738 location information. */
2739 const cpp_token *tok = test.get_token ();
2740 ASSERT_EQ (tok->type, CPP_STRING);
2741 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2742 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2744 /* At this point in lexing, the quote characters are treated as part of
2745 the string (they are stripped off by cpp_interpret_string). */
2746 ASSERT_EQ (tok->val.str.len, 15);
2748 /* Verify that cpp_interpret_string works. */
2749 cpp_string dst_string;
2750 const enum cpp_ttype type = CPP_STRING;
2751 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2752 &dst_string, type);
2753 ASSERT_TRUE (result);
2754 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2755 free (const_cast <unsigned char *> (dst_string.text));
2757 /* Verify ranges of individual characters. This no longer includes the
2758 opening quote, but does include the closing quote. */
2759 for (int i = 0; i <= 4; i++)
2760 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2761 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2762 for (int i = 6; i <= 10; i++)
2763 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2765 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2768 /* Lex a string literal containing an octal-escaped character.
2769 Verify the substring location data after running cpp_interpret_string
2770 on it. */
2772 static void
2773 test_lexer_string_locations_oct (const line_table_case &case_)
2775 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2776 and with a space in place of digit 6, to terminate the escaped
2777 octal code.
2778 ....................000000000.111111.11112222.2222223333333333444
2779 ....................123456789.012345.67890123.4567890123456789012 */
2780 const char *content = " \"01234\\065 789\" /* not a string */\n";
2781 lexer_test test (case_, content, NULL);
2783 /* Verify that we get the expected token back, with the correct
2784 location information. */
2785 const cpp_token *tok = test.get_token ();
2786 ASSERT_EQ (tok->type, CPP_STRING);
2787 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2789 /* Verify that cpp_interpret_string works. */
2790 cpp_string dst_string;
2791 const enum cpp_ttype type = CPP_STRING;
2792 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2793 &dst_string, type);
2794 ASSERT_TRUE (result);
2795 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2796 free (const_cast <unsigned char *> (dst_string.text));
2798 /* Verify ranges of individual characters. This no longer includes the
2799 opening quote, but does include the closing quote. */
2800 for (int i = 0; i < 5; i++)
2801 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2802 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2803 for (int i = 6; i <= 10; i++)
2804 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2806 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2809 /* Test of string literal containing letter escapes. */
2811 static void
2812 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2814 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2815 .....................000000000.1.11111.1.1.11222.22222223333333
2816 .....................123456789.0.12345.6.7.89012.34567890123456. */
2817 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2818 lexer_test test (case_, content, NULL);
2820 /* Verify that we get the expected tokens back. */
2821 const cpp_token *tok = test.get_token ();
2822 ASSERT_EQ (tok->type, CPP_STRING);
2823 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2825 /* Verify ranges of individual characters. */
2826 /* "\t". */
2827 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2828 0, 1, 10, 11);
2829 /* "foo". */
2830 for (int i = 1; i <= 3; i++)
2831 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2832 i, 1, 11 + i, 11 + i);
2833 /* "\\" and "\n". */
2834 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2835 4, 1, 15, 16);
2836 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2837 5, 1, 17, 18);
2839 /* "bar" and closing quote for nul-terminator. */
2840 for (int i = 6; i <= 9; i++)
2841 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2842 i, 1, 13 + i, 13 + i);
2844 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2847 /* Another test of a string literal containing a letter escape.
2848 Based on string seen in
2849 printf ("%-%\n");
2850 in gcc.dg/format/c90-printf-1.c. */
2852 static void
2853 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2855 /* .....................000000000.1111.11.1111.22222222223.
2856 .....................123456789.0123.45.6789.01234567890. */
2857 const char *content = (" \"%-%\\n\" /* non-str */\n");
2858 lexer_test test (case_, content, NULL);
2860 /* Verify that we get the expected tokens back. */
2861 const cpp_token *tok = test.get_token ();
2862 ASSERT_EQ (tok->type, CPP_STRING);
2863 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2865 /* Verify ranges of individual characters. */
2866 /* "%-%". */
2867 for (int i = 0; i < 3; i++)
2868 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2869 i, 1, 10 + i, 10 + i);
2870 /* "\n". */
2871 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2872 3, 1, 13, 14);
2874 /* Closing quote for nul-terminator. */
2875 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2876 4, 1, 15, 15);
2878 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2881 /* Lex a string literal containing UCN 4 characters.
2882 Verify the substring location data after running cpp_interpret_string
2883 on it. */
2885 static void
2886 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2888 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2889 as UCN 4.
2890 ....................000000000.111111.111122.222222223.33333333344444
2891 ....................123456789.012345.678901.234567890.12345678901234 */
2892 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2893 lexer_test test (case_, content, NULL);
2895 /* Verify that we get the expected token back, with the correct
2896 location information. */
2897 const cpp_token *tok = test.get_token ();
2898 ASSERT_EQ (tok->type, CPP_STRING);
2899 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2901 /* Verify that cpp_interpret_string works.
2902 The string should be encoded in the execution character
2903 set. Assuming that is UTF-8, we should have the following:
2904 ----------- ---- ----- ------- ----------------
2905 Byte offset Byte Octal Unicode Source Column(s)
2906 ----------- ---- ----- ------- ----------------
2907 0 0x30 '0' 10
2908 1 0x31 '1' 11
2909 2 0x32 '2' 12
2910 3 0x33 '3' 13
2911 4 0x34 '4' 14
2912 5 0xE2 \342 U+2174 15-20
2913 6 0x85 \205 (cont) 15-20
2914 7 0xB4 \264 (cont) 15-20
2915 8 0xE2 \342 U+2175 21-26
2916 9 0x85 \205 (cont) 21-26
2917 10 0xB5 \265 (cont) 21-26
2918 11 0x37 '7' 27
2919 12 0x38 '8' 28
2920 13 0x39 '9' 29
2921 14 0x00 30 (closing quote)
2922 ----------- ---- ----- ------- ---------------. */
2924 cpp_string dst_string;
2925 const enum cpp_ttype type = CPP_STRING;
2926 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2927 &dst_string, type);
2928 ASSERT_TRUE (result);
2929 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2930 (const char *)dst_string.text);
2931 free (const_cast <unsigned char *> (dst_string.text));
2933 /* Verify ranges of individual characters. This no longer includes the
2934 opening quote, but does include the closing quote.
2935 '01234'. */
2936 for (int i = 0; i <= 4; i++)
2937 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2938 /* U+2174. */
2939 for (int i = 5; i <= 7; i++)
2940 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2941 /* U+2175. */
2942 for (int i = 8; i <= 10; i++)
2943 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2944 /* '789' and nul terminator */
2945 for (int i = 11; i <= 14; i++)
2946 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2948 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2951 /* Lex a string literal containing UCN 8 characters.
2952 Verify the substring location data after running cpp_interpret_string
2953 on it. */
2955 static void
2956 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2958 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2959 ....................000000000.111111.1111222222.2222333333333.344444
2960 ....................123456789.012345.6789012345.6789012345678.901234 */
2961 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2962 lexer_test test (case_, content, NULL);
2964 /* Verify that we get the expected token back, with the correct
2965 location information. */
2966 const cpp_token *tok = test.get_token ();
2967 ASSERT_EQ (tok->type, CPP_STRING);
2968 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2969 "\"01234\\U00002174\\U00002175789\"");
2971 /* Verify that cpp_interpret_string works.
2972 The UTF-8 encoding of the string is identical to that from
2973 the ucn4 testcase above; the only difference is the column
2974 locations. */
2975 cpp_string dst_string;
2976 const enum cpp_ttype type = CPP_STRING;
2977 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2978 &dst_string, type);
2979 ASSERT_TRUE (result);
2980 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2981 (const char *)dst_string.text);
2982 free (const_cast <unsigned char *> (dst_string.text));
2984 /* Verify ranges of individual characters. This no longer includes the
2985 opening quote, but does include the closing quote.
2986 '01234'. */
2987 for (int i = 0; i <= 4; i++)
2988 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2989 /* U+2174. */
2990 for (int i = 5; i <= 7; i++)
2991 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2992 /* U+2175. */
2993 for (int i = 8; i <= 10; i++)
2994 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2995 /* '789' at columns 35-37 */
2996 for (int i = 11; i <= 13; i++)
2997 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2998 /* Closing quote/nul-terminator at column 38. */
2999 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
3001 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3004 /* Fetch a big-endian 32-bit value and convert to host endianness. */
3006 static uint32_t
3007 uint32_from_big_endian (const uint32_t *ptr_be_value)
3009 const unsigned char *buf = (const unsigned char *)ptr_be_value;
3010 return (((uint32_t) buf[0] << 24)
3011 | ((uint32_t) buf[1] << 16)
3012 | ((uint32_t) buf[2] << 8)
3013 | (uint32_t) buf[3]);
3016 /* Lex a wide string literal and verify that attempts to read substring
3017 location data from it fail gracefully. */
3019 static void
3020 test_lexer_string_locations_wide_string (const line_table_case &case_)
3022 /* Digits 0-9.
3023 ....................000000000.11111111112.22222222233333
3024 ....................123456789.01234567890.12345678901234 */
3025 const char *content = " L\"0123456789\" /* non-str */\n";
3026 lexer_test test (case_, content, NULL);
3028 /* Verify that we get the expected token back, with the correct
3029 location information. */
3030 const cpp_token *tok = test.get_token ();
3031 ASSERT_EQ (tok->type, CPP_WSTRING);
3032 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
3034 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
3035 cpp_string dst_string;
3036 const enum cpp_ttype type = CPP_WSTRING;
3037 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3038 &dst_string, type);
3039 ASSERT_TRUE (result);
3040 /* The cpp_reader defaults to big-endian with
3041 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3042 now be encoded as UTF-32BE. */
3043 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3044 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3045 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3046 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3047 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3048 free (const_cast <unsigned char *> (dst_string.text));
3050 /* We don't yet support generating substring location information
3051 for L"" strings. */
3052 ASSERT_HAS_NO_SUBSTRING_RANGES
3053 (test, tok->src_loc, type,
3054 "execution character set != source character set");
3057 /* Fetch a big-endian 16-bit value and convert to host endianness. */
3059 static uint16_t
3060 uint16_from_big_endian (const uint16_t *ptr_be_value)
3062 const unsigned char *buf = (const unsigned char *)ptr_be_value;
3063 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3066 /* Lex a u"" string literal and verify that attempts to read substring
3067 location data from it fail gracefully. */
3069 static void
3070 test_lexer_string_locations_string16 (const line_table_case &case_)
3072 /* Digits 0-9.
3073 ....................000000000.11111111112.22222222233333
3074 ....................123456789.01234567890.12345678901234 */
3075 const char *content = " u\"0123456789\" /* non-str */\n";
3076 lexer_test test (case_, content, NULL);
3078 /* Verify that we get the expected token back, with the correct
3079 location information. */
3080 const cpp_token *tok = test.get_token ();
3081 ASSERT_EQ (tok->type, CPP_STRING16);
3082 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3084 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
3085 cpp_string dst_string;
3086 const enum cpp_ttype type = CPP_STRING16;
3087 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3088 &dst_string, type);
3089 ASSERT_TRUE (result);
3091 /* The cpp_reader defaults to big-endian, so dst_string should
3092 now be encoded as UTF-16BE. */
3093 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3094 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3095 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3096 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3097 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3098 free (const_cast <unsigned char *> (dst_string.text));
3100 /* We don't yet support generating substring location information
3101 for L"" strings. */
3102 ASSERT_HAS_NO_SUBSTRING_RANGES
3103 (test, tok->src_loc, type,
3104 "execution character set != source character set");
3107 /* Lex a U"" string literal and verify that attempts to read substring
3108 location data from it fail gracefully. */
3110 static void
3111 test_lexer_string_locations_string32 (const line_table_case &case_)
3113 /* Digits 0-9.
3114 ....................000000000.11111111112.22222222233333
3115 ....................123456789.01234567890.12345678901234 */
3116 const char *content = " U\"0123456789\" /* non-str */\n";
3117 lexer_test test (case_, content, NULL);
3119 /* Verify that we get the expected token back, with the correct
3120 location information. */
3121 const cpp_token *tok = test.get_token ();
3122 ASSERT_EQ (tok->type, CPP_STRING32);
3123 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3125 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
3126 cpp_string dst_string;
3127 const enum cpp_ttype type = CPP_STRING32;
3128 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3129 &dst_string, type);
3130 ASSERT_TRUE (result);
3132 /* The cpp_reader defaults to big-endian, so dst_string should
3133 now be encoded as UTF-32BE. */
3134 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3135 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3136 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3137 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3138 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3139 free (const_cast <unsigned char *> (dst_string.text));
3141 /* We don't yet support generating substring location information
3142 for L"" strings. */
3143 ASSERT_HAS_NO_SUBSTRING_RANGES
3144 (test, tok->src_loc, type,
3145 "execution character set != source character set");
3148 /* Lex a u8-string literal.
3149 Verify the substring location data after running cpp_interpret_string
3150 on it. */
3152 static void
3153 test_lexer_string_locations_u8 (const line_table_case &case_)
3155 /* Digits 0-9.
3156 ....................000000000.11111111112.22222222233333
3157 ....................123456789.01234567890.12345678901234 */
3158 const char *content = " u8\"0123456789\" /* non-str */\n";
3159 lexer_test test (case_, content, NULL);
3161 /* Verify that we get the expected token back, with the correct
3162 location information. */
3163 const cpp_token *tok = test.get_token ();
3164 ASSERT_EQ (tok->type, CPP_UTF8STRING);
3165 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3167 /* Verify that cpp_interpret_string works. */
3168 cpp_string dst_string;
3169 const enum cpp_ttype type = CPP_STRING;
3170 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3171 &dst_string, type);
3172 ASSERT_TRUE (result);
3173 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3174 free (const_cast <unsigned char *> (dst_string.text));
3176 /* Verify ranges of individual characters. This no longer includes the
3177 opening quote, but does include the closing quote. */
3178 for (int i = 0; i <= 10; i++)
3179 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3182 /* Lex a string literal containing UTF-8 source characters.
3183 Verify the substring location data after running cpp_interpret_string
3184 on it. */
3186 static void
3187 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3189 /* This string literal is written out to the source file as UTF-8,
3190 and is of the form "before mojibake after", where "mojibake"
3191 is written as the following four unicode code points:
3192 U+6587 CJK UNIFIED IDEOGRAPH-6587
3193 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3194 U+5316 CJK UNIFIED IDEOGRAPH-5316
3195 U+3051 HIRAGANA LETTER KE.
3196 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3197 "before" and "after" are 1 byte per unicode character.
3199 The numbering shown are "columns", which are *byte* numbers within
3200 the line, rather than unicode character numbers.
3202 .................... 000000000.1111111.
3203 .................... 123456789.0123456. */
3204 const char *content = (" \"before "
3205 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3206 UTF-8: 0xE6 0x96 0x87
3207 C octal escaped UTF-8: \346\226\207
3208 "column" numbers: 17-19. */
3209 "\346\226\207"
3211 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3212 UTF-8: 0xE5 0xAD 0x97
3213 C octal escaped UTF-8: \345\255\227
3214 "column" numbers: 20-22. */
3215 "\345\255\227"
3217 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3218 UTF-8: 0xE5 0x8C 0x96
3219 C octal escaped UTF-8: \345\214\226
3220 "column" numbers: 23-25. */
3221 "\345\214\226"
3223 /* U+3051 HIRAGANA LETTER KE
3224 UTF-8: 0xE3 0x81 0x91
3225 C octal escaped UTF-8: \343\201\221
3226 "column" numbers: 26-28. */
3227 "\343\201\221"
3229 /* column numbers 29 onwards
3230 2333333.33334444444444
3231 9012345.67890123456789. */
3232 " after\" /* non-str */\n");
3233 lexer_test test (case_, content, NULL);
3235 /* Verify that we get the expected token back, with the correct
3236 location information. */
3237 const cpp_token *tok = test.get_token ();
3238 ASSERT_EQ (tok->type, CPP_STRING);
3239 ASSERT_TOKEN_AS_TEXT_EQ
3240 (test.m_parser, tok,
3241 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3243 /* Verify that cpp_interpret_string works. */
3244 cpp_string dst_string;
3245 const enum cpp_ttype type = CPP_STRING;
3246 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3247 &dst_string, type);
3248 ASSERT_TRUE (result);
3249 ASSERT_STREQ
3250 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3251 (const char *)dst_string.text);
3252 free (const_cast <unsigned char *> (dst_string.text));
3254 /* Verify ranges of individual characters. This no longer includes the
3255 opening quote, but does include the closing quote.
3256 Assuming that both source and execution encodings are UTF-8, we have
3257 a run of 25 octets in each, plus the NUL terminator. */
3258 for (int i = 0; i < 25; i++)
3259 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3260 /* NUL-terminator should use the closing quote at column 35. */
3261 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3263 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3266 /* Test of string literal concatenation. */
3268 static void
3269 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3271 /* Digits 0-9.
3272 .....................000000000.111111.11112222222222
3273 .....................123456789.012345.67890123456789. */
3274 const char *content = (" \"01234\" /* non-str */\n"
3275 " \"56789\" /* non-str */\n");
3276 lexer_test test (case_, content, NULL);
3278 location_t input_locs[2];
3280 /* Verify that we get the expected tokens back. */
3281 auto_vec <cpp_string> input_strings;
3282 const cpp_token *tok_a = test.get_token ();
3283 ASSERT_EQ (tok_a->type, CPP_STRING);
3284 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3285 input_strings.safe_push (tok_a->val.str);
3286 input_locs[0] = tok_a->src_loc;
3288 const cpp_token *tok_b = test.get_token ();
3289 ASSERT_EQ (tok_b->type, CPP_STRING);
3290 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3291 input_strings.safe_push (tok_b->val.str);
3292 input_locs[1] = tok_b->src_loc;
3294 /* Verify that cpp_interpret_string works. */
3295 cpp_string dst_string;
3296 const enum cpp_ttype type = CPP_STRING;
3297 bool result = cpp_interpret_string (test.m_parser,
3298 input_strings.address (), 2,
3299 &dst_string, type);
3300 ASSERT_TRUE (result);
3301 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3302 free (const_cast <unsigned char *> (dst_string.text));
3304 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3305 test.m_concats.record_string_concatenation (2, input_locs);
3307 location_t initial_loc = input_locs[0];
3309 /* "01234" on line 1. */
3310 for (int i = 0; i <= 4; i++)
3311 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3312 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3313 for (int i = 5; i <= 10; i++)
3314 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3316 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3319 /* Another test of string literal concatenation. */
3321 static void
3322 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3324 /* Digits 0-9.
3325 .....................000000000.111.11111112222222
3326 .....................123456789.012.34567890123456. */
3327 const char *content = (" \"01\" /* non-str */\n"
3328 " \"23\" /* non-str */\n"
3329 " \"45\" /* non-str */\n"
3330 " \"67\" /* non-str */\n"
3331 " \"89\" /* non-str */\n");
3332 lexer_test test (case_, content, NULL);
3334 auto_vec <cpp_string> input_strings;
3335 location_t input_locs[5];
3337 /* Verify that we get the expected tokens back. */
3338 for (int i = 0; i < 5; i++)
3340 const cpp_token *tok = test.get_token ();
3341 ASSERT_EQ (tok->type, CPP_STRING);
3342 input_strings.safe_push (tok->val.str);
3343 input_locs[i] = tok->src_loc;
3346 /* Verify that cpp_interpret_string works. */
3347 cpp_string dst_string;
3348 const enum cpp_ttype type = CPP_STRING;
3349 bool result = cpp_interpret_string (test.m_parser,
3350 input_strings.address (), 5,
3351 &dst_string, type);
3352 ASSERT_TRUE (result);
3353 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3354 free (const_cast <unsigned char *> (dst_string.text));
3356 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3357 test.m_concats.record_string_concatenation (5, input_locs);
3359 location_t initial_loc = input_locs[0];
3361 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3362 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3363 and expect get_source_range_for_substring to fail.
3364 However, for a string concatenation test, we can have a case
3365 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3366 but subsequent strings can be after it.
3367 Attempting to detect this within assert_char_at_range
3368 would overcomplicate the logic for the common test cases, so
3369 we detect it here. */
3370 if (should_have_column_data_p (input_locs[0])
3371 && !should_have_column_data_p (input_locs[4]))
3373 /* Verify that get_source_range_for_substring gracefully rejects
3374 this case. */
3375 source_range actual_range;
3376 const char *err
3377 = get_source_range_for_char (test.m_parser, &test.m_concats,
3378 initial_loc, type, 0, &actual_range);
3379 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3380 return;
3383 for (int i = 0; i < 5; i++)
3384 for (int j = 0; j < 2; j++)
3385 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3386 i + 1, 10 + j, 10 + j);
3388 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3389 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3391 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3394 /* Another test of string literal concatenation, this time combined with
3395 various kinds of escaped characters. */
3397 static void
3398 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3400 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3401 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3402 const char *content
3403 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3404 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3405 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3406 lexer_test test (case_, content, NULL);
3408 auto_vec <cpp_string> input_strings;
3409 location_t input_locs[4];
3411 /* Verify that we get the expected tokens back. */
3412 for (int i = 0; i < 4; i++)
3414 const cpp_token *tok = test.get_token ();
3415 ASSERT_EQ (tok->type, CPP_STRING);
3416 input_strings.safe_push (tok->val.str);
3417 input_locs[i] = tok->src_loc;
3420 /* Verify that cpp_interpret_string works. */
3421 cpp_string dst_string;
3422 const enum cpp_ttype type = CPP_STRING;
3423 bool result = cpp_interpret_string (test.m_parser,
3424 input_strings.address (), 4,
3425 &dst_string, type);
3426 ASSERT_TRUE (result);
3427 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3428 free (const_cast <unsigned char *> (dst_string.text));
3430 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3431 test.m_concats.record_string_concatenation (4, input_locs);
3433 location_t initial_loc = input_locs[0];
3435 for (int i = 0; i <= 4; i++)
3436 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3437 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3438 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3439 for (int i = 7; i <= 9; i++)
3440 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3442 /* NUL-terminator should use the location of the final closing quote. */
3443 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3445 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3448 /* Test of string literal in a macro. */
3450 static void
3451 test_lexer_string_locations_macro (const line_table_case &case_)
3453 /* Digits 0-9.
3454 .....................0000000001111111111.22222222223.
3455 .....................1234567890123456789.01234567890. */
3456 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3457 " MACRO");
3458 lexer_test test (case_, content, NULL);
3460 /* Verify that we get the expected tokens back. */
3461 const cpp_token *tok = test.get_token ();
3462 ASSERT_EQ (tok->type, CPP_PADDING);
3464 tok = test.get_token ();
3465 ASSERT_EQ (tok->type, CPP_STRING);
3466 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3468 /* Verify ranges of individual characters. We ought to
3469 see columns within the macro definition. */
3470 for (int i = 0; i <= 10; i++)
3471 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3472 i, 1, 20 + i, 20 + i);
3474 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3476 tok = test.get_token ();
3477 ASSERT_EQ (tok->type, CPP_PADDING);
3480 /* Test of stringification of a macro argument. */
3482 static void
3483 test_lexer_string_locations_stringified_macro_argument
3484 (const line_table_case &case_)
3486 /* .....................000000000111111111122222222223.
3487 .....................123456789012345678901234567890. */
3488 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3489 "MACRO(foo)\n");
3490 lexer_test test (case_, content, NULL);
3492 /* Verify that we get the expected token back. */
3493 const cpp_token *tok = test.get_token ();
3494 ASSERT_EQ (tok->type, CPP_PADDING);
3496 tok = test.get_token ();
3497 ASSERT_EQ (tok->type, CPP_STRING);
3498 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3500 /* We don't support getting the location of a stringified macro
3501 argument. Verify that it fails gracefully. */
3502 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3503 "cpp_interpret_string_1 failed");
3505 tok = test.get_token ();
3506 ASSERT_EQ (tok->type, CPP_PADDING);
3508 tok = test.get_token ();
3509 ASSERT_EQ (tok->type, CPP_PADDING);
3512 /* Ensure that we are fail gracefully if something attempts to pass
3513 in a location that isn't a string literal token. Seen on this code:
3515 const char a[] = " %d ";
3516 __builtin_printf (a, 0.5);
3519 when c-format.cc erroneously used the indicated one-character
3520 location as the format string location, leading to a read past the
3521 end of a string buffer in cpp_interpret_string_1. */
3523 static void
3524 test_lexer_string_locations_non_string (const line_table_case &case_)
3526 /* .....................000000000111111111122222222223.
3527 .....................123456789012345678901234567890. */
3528 const char *content = (" a\n");
3529 lexer_test test (case_, content, NULL);
3531 /* Verify that we get the expected token back. */
3532 const cpp_token *tok = test.get_token ();
3533 ASSERT_EQ (tok->type, CPP_NAME);
3534 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3536 /* At this point, libcpp is attempting to interpret the name as a
3537 string literal, despite it not starting with a quote. We don't detect
3538 that, but we should at least fail gracefully. */
3539 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3540 "cpp_interpret_string_1 failed");
3543 /* Ensure that we can read substring information for a token which
3544 starts in one linemap and ends in another . Adapted from
3545 gcc.dg/cpp/pr69985.c. */
3547 static void
3548 test_lexer_string_locations_long_line (const line_table_case &case_)
3550 /* .....................000000.000111111111
3551 .....................123456.789012346789. */
3552 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3553 " \"0123456789012345678901234567890123456789"
3554 "0123456789012345678901234567890123456789"
3555 "0123456789012345678901234567890123456789"
3556 "0123456789\"\n");
3558 lexer_test test (case_, content, NULL);
3560 /* Verify that we get the expected token back. */
3561 const cpp_token *tok = test.get_token ();
3562 ASSERT_EQ (tok->type, CPP_STRING);
3564 if (!should_have_column_data_p (line_table->highest_location))
3565 return;
3567 /* Verify ranges of individual characters. */
3568 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3569 for (int i = 0; i < 131; i++)
3570 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3571 i, 2, 7 + i, 7 + i);
3574 /* Test of locations within a raw string that doesn't contain a newline. */
3576 static void
3577 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3579 /* .....................00.0000000111111111122.
3580 .....................12.3456789012345678901. */
3581 const char *content = ("R\"foo(0123456789)foo\"\n");
3582 lexer_test test (case_, content, NULL);
3584 /* Verify that we get the expected token back. */
3585 const cpp_token *tok = test.get_token ();
3586 ASSERT_EQ (tok->type, CPP_STRING);
3588 /* Verify that cpp_interpret_string works. */
3589 cpp_string dst_string;
3590 const enum cpp_ttype type = CPP_STRING;
3591 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3592 &dst_string, type);
3593 ASSERT_TRUE (result);
3594 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3595 free (const_cast <unsigned char *> (dst_string.text));
3597 if (!should_have_column_data_p (line_table->highest_location))
3598 return;
3600 /* 0-9, plus the nil terminator. */
3601 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3602 for (int i = 0; i < 11; i++)
3603 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3604 i, 1, 7 + i, 7 + i);
3607 /* Test of locations within a raw string that contains a newline. */
3609 static void
3610 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3612 /* .....................00.0000.
3613 .....................12.3456. */
3614 const char *content = ("R\"foo(\n"
3615 /* .....................00000.
3616 .....................12345. */
3617 "hello\n"
3618 "world\n"
3619 /* .....................00000.
3620 .....................12345. */
3621 ")foo\"\n");
3622 lexer_test test (case_, content, NULL);
3624 /* Verify that we get the expected token back. */
3625 const cpp_token *tok = test.get_token ();
3626 ASSERT_EQ (tok->type, CPP_STRING);
3628 /* Verify that cpp_interpret_string works. */
3629 cpp_string dst_string;
3630 const enum cpp_ttype type = CPP_STRING;
3631 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3632 &dst_string, type);
3633 ASSERT_TRUE (result);
3634 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3635 free (const_cast <unsigned char *> (dst_string.text));
3637 if (!should_have_column_data_p (line_table->highest_location))
3638 return;
3640 /* Currently we don't support locations within raw strings that
3641 contain newlines. */
3642 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3643 "range endpoints are on different lines");
3646 /* Test of parsing an unterminated raw string. */
3648 static void
3649 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3651 const char *content = "R\"ouch()ouCh\" /* etc */";
3653 lexer_diagnostic_sink diagnostics;
3654 lexer_test test (case_, content, &diagnostics);
3655 test.m_implicitly_expect_EOF = false;
3657 /* Attempt to parse the raw string. */
3658 const cpp_token *tok = test.get_token ();
3659 ASSERT_EQ (tok->type, CPP_EOF);
3661 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3662 /* We expect the message "unterminated raw string"
3663 in the "cpplib" translation domain.
3664 It's not clear that dgettext is available on all supported hosts,
3665 so this assertion is commented-out for now.
3666 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3667 diagnostics.m_diagnostics[0]);
3671 /* Test of lexing char constants. */
3673 static void
3674 test_lexer_char_constants (const line_table_case &case_)
3676 /* Various char constants.
3677 .....................0000000001111111111.22222222223.
3678 .....................1234567890123456789.01234567890. */
3679 const char *content = (" 'a'\n"
3680 " u'a'\n"
3681 " U'a'\n"
3682 " L'a'\n"
3683 " 'abc'\n");
3684 lexer_test test (case_, content, NULL);
3686 /* Verify that we get the expected tokens back. */
3687 /* 'a'. */
3688 const cpp_token *tok = test.get_token ();
3689 ASSERT_EQ (tok->type, CPP_CHAR);
3690 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3692 unsigned int chars_seen;
3693 int unsignedp;
3694 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3695 &chars_seen, &unsignedp);
3696 ASSERT_EQ (cc, 'a');
3697 ASSERT_EQ (chars_seen, 1);
3699 /* u'a'. */
3700 tok = test.get_token ();
3701 ASSERT_EQ (tok->type, CPP_CHAR16);
3702 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3704 /* U'a'. */
3705 tok = test.get_token ();
3706 ASSERT_EQ (tok->type, CPP_CHAR32);
3707 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3709 /* L'a'. */
3710 tok = test.get_token ();
3711 ASSERT_EQ (tok->type, CPP_WCHAR);
3712 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3714 /* 'abc' (c-char-sequence). */
3715 tok = test.get_token ();
3716 ASSERT_EQ (tok->type, CPP_CHAR);
3717 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3719 /* A table of interesting location_t values, giving one axis of our test
3720 matrix. */
3722 static const location_t boundary_locations[] = {
3723 /* Zero means "don't override the default values for a new line_table". */
3726 /* An arbitrary non-zero value that isn't close to one of
3727 the boundary values below. */
3728 0x10000,
3730 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3731 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3732 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3733 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3734 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3735 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3737 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3738 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3739 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3740 LINE_MAP_MAX_LOCATION_WITH_COLS,
3741 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3742 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3745 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3747 void
3748 for_each_line_table_case (void (*testcase) (const line_table_case &))
3750 /* As noted above in the description of struct line_table_case,
3751 we want to explore a test matrix of interesting line_table
3752 situations, running various selftests for each case within the
3753 matrix. */
3755 /* Run all tests with:
3756 (a) line_table->default_range_bits == 0, and
3757 (b) line_table->default_range_bits == 5. */
3758 int num_cases_tested = 0;
3759 for (int default_range_bits = 0; default_range_bits <= 5;
3760 default_range_bits += 5)
3762 /* ...and use each of the "interesting" location values as
3763 the starting location within line_table. */
3764 const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
3765 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3767 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3769 testcase (c);
3771 num_cases_tested++;
3775 /* Verify that we fully covered the test matrix. */
3776 ASSERT_EQ (num_cases_tested, 2 * 12);
3779 /* Verify that when presented with a consecutive pair of locations with
3780 a very large line offset, we don't attempt to consolidate them into
3781 a single ordinary linemap where the line offsets within the line map
3782 would lead to overflow (PR lto/88147). */
3784 static void
3785 test_line_offset_overflow ()
3787 line_table_test ltt (line_table_case (5, 0));
3789 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3790 linemap_line_start (line_table, 1, 100);
3791 location_t loc_a = linemap_line_start (line_table, 2578, 255);
3792 assert_loceq ("foo.c", 2578, 0, loc_a);
3794 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3795 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3796 ASSERT_EQ (ordmap_a->m_range_bits, 5);
3798 location_t loc_b = linemap_line_start (line_table, 404198, 512);
3799 assert_loceq ("foo.c", 404198, 0, loc_b);
3801 /* We should have started a new linemap, rather than attempting to store
3802 a very large line offset. */
3803 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3804 ASSERT_NE (ordmap_a, ordmap_b);
3807 void test_cpp_utf8 ()
3809 const int def_tabstop = 8;
3810 cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3812 /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
3814 int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
3815 ASSERT_EQ (8, w_bad);
3816 int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
3817 ASSERT_EQ (5, w_ctrl);
3820 /* Verify that wcwidth of valid UTF-8 is as expected. */
3822 const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
3823 ASSERT_EQ (1, w_pi);
3824 const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
3825 ASSERT_EQ (2, w_emoji);
3826 const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3827 policy);
3828 ASSERT_EQ (1, w_umlaut_precomposed);
3829 const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3830 policy);
3831 ASSERT_EQ (1, w_umlaut_combining);
3832 const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
3833 ASSERT_EQ (2, w_han);
3834 const int w_ascii = cpp_display_width ("GCC", 3, policy);
3835 ASSERT_EQ (3, w_ascii);
3836 const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3837 "\x9f! \xe4\xb8\xba y\xcc\x88",
3838 24, policy);
3839 ASSERT_EQ (18, w_mixed);
3842 /* Verify that display width properly expands tabs. */
3844 const char *tstr = "\tabc\td";
3845 ASSERT_EQ (6, cpp_display_width (tstr, 6,
3846 cpp_char_column_policy (1, cpp_wcwidth)));
3847 ASSERT_EQ (10, cpp_display_width (tstr, 6,
3848 cpp_char_column_policy (3, cpp_wcwidth)));
3849 ASSERT_EQ (17, cpp_display_width (tstr, 6,
3850 cpp_char_column_policy (8, cpp_wcwidth)));
3851 ASSERT_EQ (1,
3852 cpp_display_column_to_byte_column
3853 (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
3856 /* Verify that cpp_byte_column_to_display_column can go past the end,
3857 and similar edge cases. */
3859 const char *str
3860 /* Display columns.
3861 111111112345 */
3862 = "\xcf\x80 abc";
3863 /* 111122223456
3864 Byte columns. */
3866 ASSERT_EQ (5, cpp_display_width (str, 6, policy));
3867 ASSERT_EQ (105,
3868 cpp_byte_column_to_display_column (str, 6, 106, policy));
3869 ASSERT_EQ (10000,
3870 cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
3871 ASSERT_EQ (0,
3872 cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
3875 /* Verify that cpp_display_column_to_byte_column can go past the end,
3876 and similar edge cases, and check invertibility. */
3878 const char *str
3879 /* Display columns.
3880 000000000000000000000000000000000000011
3881 111111112222222234444444455555555678901 */
3882 = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
3883 /* 000000000000000000000000000000000111111
3884 111122223333444456666777788889999012345
3885 Byte columns. */
3886 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
3887 ASSERT_EQ (15,
3888 cpp_display_column_to_byte_column (str, 15, 11, policy));
3889 ASSERT_EQ (115,
3890 cpp_display_column_to_byte_column (str, 15, 111, policy));
3891 ASSERT_EQ (10000,
3892 cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
3893 ASSERT_EQ (0,
3894 cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
3896 /* Verify that we do not interrupt a UTF-8 sequence. */
3897 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
3899 for (int byte_col = 1; byte_col <= 15; ++byte_col)
3901 const int disp_col
3902 = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
3903 const int byte_col2
3904 = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
3906 /* If we ask for the display column in the middle of a UTF-8
3907 sequence, it will return the length of the partial sequence,
3908 matching the behavior of GCC before display column support.
3909 Otherwise check the round trip was successful. */
3910 if (byte_col < 4)
3911 ASSERT_EQ (byte_col, disp_col);
3912 else if (byte_col >= 6 && byte_col < 9)
3913 ASSERT_EQ (3 + (byte_col - 5), disp_col);
3914 else
3915 ASSERT_EQ (byte_col2, byte_col);
3921 /* Run all of the selftests within this file. */
3923 void
3924 input_cc_tests ()
3926 test_linenum_comparisons ();
3927 test_should_have_column_data_p ();
3928 test_unknown_location ();
3929 test_builtins ();
3930 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3932 for_each_line_table_case (test_accessing_ordinary_linemaps);
3933 for_each_line_table_case (test_lexer);
3934 for_each_line_table_case (test_lexer_string_locations_simple);
3935 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3936 for_each_line_table_case (test_lexer_string_locations_hex);
3937 for_each_line_table_case (test_lexer_string_locations_oct);
3938 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3939 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3940 for_each_line_table_case (test_lexer_string_locations_ucn4);
3941 for_each_line_table_case (test_lexer_string_locations_ucn8);
3942 for_each_line_table_case (test_lexer_string_locations_wide_string);
3943 for_each_line_table_case (test_lexer_string_locations_string16);
3944 for_each_line_table_case (test_lexer_string_locations_string32);
3945 for_each_line_table_case (test_lexer_string_locations_u8);
3946 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3947 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3948 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3949 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3950 for_each_line_table_case (test_lexer_string_locations_macro);
3951 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3952 for_each_line_table_case (test_lexer_string_locations_non_string);
3953 for_each_line_table_case (test_lexer_string_locations_long_line);
3954 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3955 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3956 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3957 for_each_line_table_case (test_lexer_char_constants);
3959 test_reading_source_line ();
3961 test_line_offset_overflow ();
3963 test_cpp_utf8 ();
3966 } // namespace selftest
3968 #endif /* CHECKING_P */