compiler: only build thunk struct type when it is needed
[official-gcc.git] / gcc / input.cc
bloba28abfac5ace7537b0f60120d0cb35fbfc4a8504
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2022 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic.h"
25 #include "selftest.h"
26 #include "cpplib.h"
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
32 /* Input charset configuration. */
33 static const char *default_charset_callback (const char *)
35 return nullptr;
38 void
39 file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
40 bool should_skip_bom)
42 in_context.ccb = (ccb ? ccb : default_charset_callback);
43 in_context.should_skip_bom = should_skip_bom;
46 /* This is a cache used by get_next_line to store the content of a
47 file to be searched for file lines. */
48 class file_cache_slot
50 public:
51 file_cache_slot ();
52 ~file_cache_slot ();
54 bool read_line_num (size_t line_num,
55 char ** line, ssize_t *line_len);
57 /* Accessors. */
58 const char *get_file_path () const { return m_file_path; }
59 unsigned get_use_count () const { return m_use_count; }
60 bool missing_trailing_newline_p () const
62 return m_missing_trailing_newline;
65 void inc_use_count () { m_use_count++; }
67 bool create (const file_cache::input_context &in_context,
68 const char *file_path, FILE *fp, unsigned highest_use_count);
69 void evict ();
71 private:
72 /* These are information used to store a line boundary. */
73 class line_info
75 public:
76 /* The line number. It starts from 1. */
77 size_t line_num;
79 /* The position (byte count) of the beginning of the line,
80 relative to the file data pointer. This starts at zero. */
81 size_t start_pos;
83 /* The position (byte count) of the last byte of the line. This
84 normally points to the '\n' character, or to one byte after the
85 last byte of the file, if the file doesn't contain a '\n'
86 character. */
87 size_t end_pos;
89 line_info (size_t l, size_t s, size_t e)
90 : line_num (l), start_pos (s), end_pos (e)
93 line_info ()
94 :line_num (0), start_pos (0), end_pos (0)
98 bool needs_read_p () const;
99 bool needs_grow_p () const;
100 void maybe_grow ();
101 bool read_data ();
102 bool maybe_read_data ();
103 bool get_next_line (char **line, ssize_t *line_len);
104 bool read_next_line (char ** line, ssize_t *line_len);
105 bool goto_next_line ();
107 static const size_t buffer_size = 4 * 1024;
108 static const size_t line_record_size = 100;
110 /* The number of time this file has been accessed. This is used
111 to designate which file cache to evict from the cache
112 array. */
113 unsigned m_use_count;
115 /* The file_path is the key for identifying a particular file in
116 the cache.
117 For libcpp-using code, the underlying buffer for this field is
118 owned by the corresponding _cpp_file within the cpp_reader. */
119 const char *m_file_path;
121 FILE *m_fp;
123 /* This points to the content of the file that we've read so
124 far. */
125 char *m_data;
127 /* The allocated buffer to be freed may start a little earlier than DATA,
128 e.g. if a UTF8 BOM was skipped at the beginning. */
129 int m_alloc_offset;
131 /* The size of the DATA array above.*/
132 size_t m_size;
134 /* The number of bytes read from the underlying file so far. This
135 must be less (or equal) than SIZE above. */
136 size_t m_nb_read;
138 /* The index of the beginning of the current line. */
139 size_t m_line_start_idx;
141 /* The number of the previous line read. This starts at 1. Zero
142 means we've read no line so far. */
143 size_t m_line_num;
145 /* This is the total number of lines of the current file. At the
146 moment, we try to get this information from the line map
147 subsystem. Note that this is just a hint. When using the C++
148 front-end, this hint is correct because the input file is then
149 completely tokenized before parsing starts; so the line map knows
150 the number of lines before compilation really starts. For e.g,
151 the C front-end, it can happen that we start emitting diagnostics
152 before the line map has seen the end of the file. */
153 size_t m_total_lines;
155 /* Could this file be missing a trailing newline on its final line?
156 Initially true (to cope with empty files), set to true/false
157 as each line is read. */
158 bool m_missing_trailing_newline;
160 /* This is a record of the beginning and end of the lines we've seen
161 while reading the file. This is useful to avoid walking the data
162 from the beginning when we are asked to read a line that is
163 before LINE_START_IDX above. Note that the maximum size of this
164 record is line_record_size, so that the memory consumption
165 doesn't explode. We thus scale total_lines down to
166 line_record_size. */
167 vec<line_info, va_heap> m_line_record;
169 void offset_buffer (int offset)
171 gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
172 : (size_t) offset <= m_size);
173 gcc_assert (m_data);
174 m_alloc_offset += offset;
175 m_data += offset;
176 m_size -= offset;
181 /* Current position in real source file. */
183 location_t input_location = UNKNOWN_LOCATION;
185 class line_maps *line_table;
187 /* A stashed copy of "line_table" for use by selftest::line_table_test.
188 This needs to be a global so that it can be a GC root, and thus
189 prevent the stashed copy from being garbage-collected if the GC runs
190 during a line_table_test. */
192 class line_maps *saved_line_table;
194 /* Expand the source location LOC into a human readable location. If
195 LOC resolves to a builtin location, the file name of the readable
196 location is set to the string "<built-in>". If EXPANSION_POINT_P is
197 TRUE and LOC is virtual, then it is resolved to the expansion
198 point of the involved macro. Otherwise, it is resolved to the
199 spelling location of the token.
201 When resolving to the spelling location of the token, if the
202 resulting location is for a built-in location (that is, it has no
203 associated line/column) in the context of a macro expansion, the
204 returned location is the first one (while unwinding the macro
205 location towards its expansion point) that is in real source
206 code.
208 ASPECT controls which part of the location to use. */
210 static expanded_location
211 expand_location_1 (location_t loc,
212 bool expansion_point_p,
213 enum location_aspect aspect)
215 expanded_location xloc;
216 const line_map_ordinary *map;
217 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
218 tree block = NULL;
220 if (IS_ADHOC_LOC (loc))
222 block = LOCATION_BLOCK (loc);
223 loc = LOCATION_LOCUS (loc);
226 memset (&xloc, 0, sizeof (xloc));
228 if (loc >= RESERVED_LOCATION_COUNT)
230 if (!expansion_point_p)
232 /* We want to resolve LOC to its spelling location.
234 But if that spelling location is a reserved location that
235 appears in the context of a macro expansion (like for a
236 location for a built-in token), let's consider the first
237 location (toward the expansion point) that is not reserved;
238 that is, the first location that is in real source code. */
239 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
240 loc, NULL);
241 lrk = LRK_SPELLING_LOCATION;
243 loc = linemap_resolve_location (line_table, loc, lrk, &map);
245 /* loc is now either in an ordinary map, or is a reserved location.
246 If it is a compound location, the caret is in a spelling location,
247 but the start/finish might still be a virtual location.
248 Depending of what the caller asked for, we may need to recurse
249 one level in order to resolve any virtual locations in the
250 end-points. */
251 switch (aspect)
253 default:
254 gcc_unreachable ();
255 /* Fall through. */
256 case LOCATION_ASPECT_CARET:
257 break;
258 case LOCATION_ASPECT_START:
260 location_t start = get_start (loc);
261 if (start != loc)
262 return expand_location_1 (start, expansion_point_p, aspect);
264 break;
265 case LOCATION_ASPECT_FINISH:
267 location_t finish = get_finish (loc);
268 if (finish != loc)
269 return expand_location_1 (finish, expansion_point_p, aspect);
271 break;
273 xloc = linemap_expand_location (line_table, map, loc);
276 xloc.data = block;
277 if (loc <= BUILTINS_LOCATION)
278 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
280 return xloc;
283 /* Initialize the set of cache used for files accessed by caret
284 diagnostic. */
286 static void
287 diagnostic_file_cache_init (void)
289 gcc_assert (global_dc);
290 if (global_dc->m_file_cache == NULL)
291 global_dc->m_file_cache = new file_cache ();
294 /* Free the resources used by the set of cache used for files accessed
295 by caret diagnostic. */
297 void
298 diagnostic_file_cache_fini (void)
300 if (global_dc->m_file_cache)
302 delete global_dc->m_file_cache;
303 global_dc->m_file_cache = NULL;
307 /* Return the total lines number that have been read so far by the
308 line map (in the preprocessor) so far. For languages like C++ that
309 entirely preprocess the input file before starting to parse, this
310 equals the actual number of lines of the file. */
312 static size_t
313 total_lines_num (const char *file_path)
315 size_t r = 0;
316 location_t l = 0;
317 if (linemap_get_file_highest_location (line_table, file_path, &l))
319 gcc_assert (l >= RESERVED_LOCATION_COUNT);
320 expanded_location xloc = expand_location (l);
321 r = xloc.line;
323 return r;
326 /* Lookup the cache used for the content of a given file accessed by
327 caret diagnostic. Return the found cached file, or NULL if no
328 cached file was found. */
330 file_cache_slot *
331 file_cache::lookup_file (const char *file_path)
333 gcc_assert (file_path);
335 /* This will contain the found cached file. */
336 file_cache_slot *r = NULL;
337 for (unsigned i = 0; i < num_file_slots; ++i)
339 file_cache_slot *c = &m_file_slots[i];
340 if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
342 c->inc_use_count ();
343 r = c;
347 if (r)
348 r->inc_use_count ();
350 return r;
353 /* Purge any mention of FILENAME from the cache of files used for
354 printing source code. For use in selftests when working
355 with tempfiles. */
357 void
358 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
360 gcc_assert (file_path);
362 if (!global_dc->m_file_cache)
363 return;
365 global_dc->m_file_cache->forcibly_evict_file (file_path);
368 void
369 file_cache::forcibly_evict_file (const char *file_path)
371 gcc_assert (file_path);
373 file_cache_slot *r = lookup_file (file_path);
374 if (!r)
375 /* Not found. */
376 return;
378 r->evict ();
381 void
382 file_cache_slot::evict ()
384 m_file_path = NULL;
385 if (m_fp)
386 fclose (m_fp);
387 m_fp = NULL;
388 m_nb_read = 0;
389 m_line_start_idx = 0;
390 m_line_num = 0;
391 m_line_record.truncate (0);
392 m_use_count = 0;
393 m_total_lines = 0;
394 m_missing_trailing_newline = true;
397 /* Return the file cache that has been less used, recently, or the
398 first empty one. If HIGHEST_USE_COUNT is non-null,
399 *HIGHEST_USE_COUNT is set to the highest use count of the entries
400 in the cache table. */
402 file_cache_slot*
403 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
405 diagnostic_file_cache_init ();
407 file_cache_slot *to_evict = &m_file_slots[0];
408 unsigned huc = to_evict->get_use_count ();
409 for (unsigned i = 1; i < num_file_slots; ++i)
411 file_cache_slot *c = &m_file_slots[i];
412 bool c_is_empty = (c->get_file_path () == NULL);
414 if (c->get_use_count () < to_evict->get_use_count ()
415 || (to_evict->get_file_path () && c_is_empty))
416 /* We evict C because it's either an entry with a lower use
417 count or one that is empty. */
418 to_evict = c;
420 if (huc < c->get_use_count ())
421 huc = c->get_use_count ();
423 if (c_is_empty)
424 /* We've reached the end of the cache; subsequent elements are
425 all empty. */
426 break;
429 if (highest_use_count)
430 *highest_use_count = huc;
432 return to_evict;
435 /* Create the cache used for the content of a given file to be
436 accessed by caret diagnostic. This cache is added to an array of
437 cache and can be retrieved by lookup_file_in_cache_tab. This
438 function returns the created cache. Note that only the last
439 num_file_slots files are cached. */
441 file_cache_slot*
442 file_cache::add_file (const char *file_path)
445 FILE *fp = fopen (file_path, "r");
446 if (fp == NULL)
447 return NULL;
449 unsigned highest_use_count = 0;
450 file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
451 if (!r->create (in_context, file_path, fp, highest_use_count))
452 return NULL;
453 return r;
456 /* Populate this slot for use on FILE_PATH and FP, dropping any
457 existing cached content within it. */
459 bool
460 file_cache_slot::create (const file_cache::input_context &in_context,
461 const char *file_path, FILE *fp,
462 unsigned highest_use_count)
464 m_file_path = file_path;
465 if (m_fp)
466 fclose (m_fp);
467 m_fp = fp;
468 if (m_alloc_offset)
469 offset_buffer (-m_alloc_offset);
470 m_nb_read = 0;
471 m_line_start_idx = 0;
472 m_line_num = 0;
473 m_line_record.truncate (0);
474 /* Ensure that this cache entry doesn't get evicted next time
475 add_file_to_cache_tab is called. */
476 m_use_count = ++highest_use_count;
477 m_total_lines = total_lines_num (file_path);
478 m_missing_trailing_newline = true;
481 /* Check the input configuration to determine if we need to do any
482 transformations, such as charset conversion or BOM skipping. */
483 if (const char *input_charset = in_context.ccb (file_path))
485 /* Need a full-blown conversion of the input charset. */
486 fclose (m_fp);
487 m_fp = NULL;
488 const cpp_converted_source cs
489 = cpp_get_converted_source (file_path, input_charset);
490 if (!cs.data)
491 return false;
492 if (m_data)
493 XDELETEVEC (m_data);
494 m_data = cs.data;
495 m_nb_read = m_size = cs.len;
496 m_alloc_offset = cs.data - cs.to_free;
498 else if (in_context.should_skip_bom)
500 if (read_data ())
502 const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
503 offset_buffer (offset);
504 m_nb_read -= offset;
508 return true;
511 /* file_cache's ctor. */
513 file_cache::file_cache ()
514 : m_file_slots (new file_cache_slot[num_file_slots])
516 initialize_input_context (nullptr, false);
519 /* file_cache's dtor. */
521 file_cache::~file_cache ()
523 delete[] m_file_slots;
526 /* Lookup the cache used for the content of a given file accessed by
527 caret diagnostic. If no cached file was found, create a new cache
528 for this file, add it to the array of cached file and return
529 it. */
531 file_cache_slot*
532 file_cache::lookup_or_add_file (const char *file_path)
534 file_cache_slot *r = lookup_file (file_path);
535 if (r == NULL)
536 r = add_file (file_path);
537 return r;
540 /* Default constructor for a cache of file used by caret
541 diagnostic. */
543 file_cache_slot::file_cache_slot ()
544 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
545 m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
546 m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
548 m_line_record.create (0);
551 /* Destructor for a cache of file used by caret diagnostic. */
553 file_cache_slot::~file_cache_slot ()
555 if (m_fp)
557 fclose (m_fp);
558 m_fp = NULL;
560 if (m_data)
562 offset_buffer (-m_alloc_offset);
563 XDELETEVEC (m_data);
564 m_data = 0;
566 m_line_record.release ();
569 /* Returns TRUE iff the cache would need to be filled with data coming
570 from the file. That is, either the cache is empty or full or the
571 current line is empty. Note that if the cache is full, it would
572 need to be extended and filled again. */
574 bool
575 file_cache_slot::needs_read_p () const
577 return m_fp && (m_nb_read == 0
578 || m_nb_read == m_size
579 || (m_line_start_idx >= m_nb_read - 1));
582 /* Return TRUE iff the cache is full and thus needs to be
583 extended. */
585 bool
586 file_cache_slot::needs_grow_p () const
588 return m_nb_read == m_size;
591 /* Grow the cache if it needs to be extended. */
593 void
594 file_cache_slot::maybe_grow ()
596 if (!needs_grow_p ())
597 return;
599 if (!m_data)
601 gcc_assert (m_size == 0 && m_alloc_offset == 0);
602 m_size = buffer_size;
603 m_data = XNEWVEC (char, m_size);
605 else
607 const int offset = m_alloc_offset;
608 offset_buffer (-offset);
609 m_size *= 2;
610 m_data = XRESIZEVEC (char, m_data, m_size);
611 offset_buffer (offset);
615 /* Read more data into the cache. Extends the cache if need be.
616 Returns TRUE iff new data could be read. */
618 bool
619 file_cache_slot::read_data ()
621 if (feof (m_fp) || ferror (m_fp))
622 return false;
624 maybe_grow ();
626 char * from = m_data + m_nb_read;
627 size_t to_read = m_size - m_nb_read;
628 size_t nb_read = fread (from, 1, to_read, m_fp);
630 if (ferror (m_fp))
631 return false;
633 m_nb_read += nb_read;
634 return !!nb_read;
637 /* Read new data iff the cache needs to be filled with more data
638 coming from the file FP. Return TRUE iff the cache was filled with
639 mode data. */
641 bool
642 file_cache_slot::maybe_read_data ()
644 if (!needs_read_p ())
645 return false;
646 return read_data ();
649 /* Helper function for file_cache_slot::get_next_line (), to find the end of
650 the next line. Returns with the memchr convention, i.e. nullptr if a line
651 terminator was not found. We need to determine line endings in the same
652 manner that libcpp does: any of \n, \r\n, or \r is a line ending. */
654 static char *
655 find_end_of_line (char *s, size_t len)
657 for (const auto end = s + len; s != end; ++s)
659 if (*s == '\n')
660 return s;
661 if (*s == '\r')
663 const auto next = s + 1;
664 if (next == end)
666 /* Don't find the line ending if \r is the very last character
667 in the buffer; we do not know if it's the end of the file or
668 just the end of what has been read so far, and we wouldn't
669 want to break in the middle of what's actually a \r\n
670 sequence. Instead, we will handle the case of a file ending
671 in a \r later. */
672 break;
674 return (*next == '\n' ? next : s);
677 return nullptr;
680 /* Read a new line from file FP, using C as a cache for the data
681 coming from the file. Upon successful completion, *LINE is set to
682 the beginning of the line found. *LINE points directly in the
683 line cache and is only valid until the next call of get_next_line.
684 *LINE_LEN is set to the length of the line. Note that the line
685 does not contain any terminal delimiter. This function returns
686 true if some data was read or process from the cache, false
687 otherwise. Note that subsequent calls to get_next_line might
688 make the content of *LINE invalid. */
690 bool
691 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
693 /* Fill the cache with data to process. */
694 maybe_read_data ();
696 size_t remaining_size = m_nb_read - m_line_start_idx;
697 if (remaining_size == 0)
698 /* There is no more data to process. */
699 return false;
701 char *line_start = m_data + m_line_start_idx;
703 char *next_line_start = NULL;
704 size_t len = 0;
705 char *line_end = find_end_of_line (line_start, remaining_size);
706 if (line_end == NULL)
708 /* We haven't found an end-of-line delimiter in the cache.
709 Fill the cache with more data from the file and look again. */
710 while (maybe_read_data ())
712 line_start = m_data + m_line_start_idx;
713 remaining_size = m_nb_read - m_line_start_idx;
714 line_end = find_end_of_line (line_start, remaining_size);
715 if (line_end != NULL)
717 next_line_start = line_end + 1;
718 break;
721 if (line_end == NULL)
723 /* We've loaded all the file into the cache and still no
724 terminator. Let's say the line ends up at one byte past the
725 end of the file. This is to stay consistent with the case
726 of when the line ends up with a terminator and line_end points to
727 that. That consistency is useful below in the len calculation.
729 If the file ends in a \r, we didn't identify it as a line
730 terminator above, so do that now instead. */
731 line_end = m_data + m_nb_read;
732 if (m_nb_read && line_end[-1] == '\r')
734 --line_end;
735 m_missing_trailing_newline = false;
737 else
738 m_missing_trailing_newline = true;
740 else
741 m_missing_trailing_newline = false;
743 else
745 next_line_start = line_end + 1;
746 m_missing_trailing_newline = false;
749 if (m_fp && ferror (m_fp))
750 return false;
752 /* At this point, we've found the end of the of line. It either points to
753 the line terminator or to one byte after the last byte of the file. */
754 gcc_assert (line_end != NULL);
756 len = line_end - line_start;
758 if (m_line_start_idx < m_nb_read)
759 *line = line_start;
761 ++m_line_num;
763 /* Before we update our line record, make sure the hint about the
764 total number of lines of the file is correct. If it's not, then
765 we give up recording line boundaries from now on. */
766 bool update_line_record = true;
767 if (m_line_num > m_total_lines)
768 update_line_record = false;
770 /* Now update our line record so that re-reading lines from the
771 before m_line_start_idx is faster. */
772 if (update_line_record
773 && m_line_record.length () < line_record_size)
775 /* If the file lines fits in the line record, we just record all
776 its lines ...*/
777 if (m_total_lines <= line_record_size
778 && m_line_num > m_line_record.length ())
779 m_line_record.safe_push
780 (file_cache_slot::line_info (m_line_num,
781 m_line_start_idx,
782 line_end - m_data));
783 else if (m_total_lines > line_record_size)
785 /* ... otherwise, we just scale total_lines down to
786 (line_record_size lines. */
787 size_t n = (m_line_num * line_record_size) / m_total_lines;
788 if (m_line_record.length () == 0
789 || n >= m_line_record.length ())
790 m_line_record.safe_push
791 (file_cache_slot::line_info (m_line_num,
792 m_line_start_idx,
793 line_end - m_data));
797 /* Update m_line_start_idx so that it points to the next line to be
798 read. */
799 if (next_line_start)
800 m_line_start_idx = next_line_start - m_data;
801 else
802 /* We didn't find any terminal '\n'. Let's consider that the end
803 of line is the end of the data in the cache. The next
804 invocation of get_next_line will either read more data from the
805 underlying file or return false early because we've reached the
806 end of the file. */
807 m_line_start_idx = m_nb_read;
809 *line_len = len;
811 return true;
814 /* Consume the next bytes coming from the cache (or from its
815 underlying file if there are remaining unread bytes in the file)
816 until we reach the next end-of-line (or end-of-file). There is no
817 copying from the cache involved. Return TRUE upon successful
818 completion. */
820 bool
821 file_cache_slot::goto_next_line ()
823 char *l;
824 ssize_t len;
826 return get_next_line (&l, &len);
829 /* Read an arbitrary line number LINE_NUM from the file cached in C.
830 If the line was read successfully, *LINE points to the beginning
831 of the line in the file cache and *LINE_LEN is the length of the
832 line. *LINE is not nul-terminated, but may contain zero bytes.
833 *LINE is only valid until the next call of read_line_num.
834 This function returns bool if a line was read. */
836 bool
837 file_cache_slot::read_line_num (size_t line_num,
838 char ** line, ssize_t *line_len)
840 gcc_assert (line_num > 0);
842 if (line_num <= m_line_num)
844 /* We've been asked to read lines that are before m_line_num.
845 So lets use our line record (if it's not empty) to try to
846 avoid re-reading the file from the beginning again. */
848 if (m_line_record.is_empty ())
850 m_line_start_idx = 0;
851 m_line_num = 0;
853 else
855 file_cache_slot::line_info *i = NULL;
856 if (m_total_lines <= line_record_size)
858 /* In languages where the input file is not totally
859 preprocessed up front, the m_total_lines hint
860 can be smaller than the number of lines of the
861 file. In that case, only the first
862 m_total_lines have been recorded.
864 Otherwise, the first m_total_lines we've read have
865 their start/end recorded here. */
866 i = (line_num <= m_total_lines)
867 ? &m_line_record[line_num - 1]
868 : &m_line_record[m_total_lines - 1];
869 gcc_assert (i->line_num <= line_num);
871 else
873 /* So the file had more lines than our line record
874 size. Thus the number of lines we've recorded has
875 been scaled down to line_record_size. Let's
876 pick the start/end of the recorded line that is
877 closest to line_num. */
878 size_t n = (line_num <= m_total_lines)
879 ? line_num * line_record_size / m_total_lines
880 : m_line_record.length () - 1;
881 if (n < m_line_record.length ())
883 i = &m_line_record[n];
884 gcc_assert (i->line_num <= line_num);
888 if (i && i->line_num == line_num)
890 /* We have the start/end of the line. */
891 *line = m_data + i->start_pos;
892 *line_len = i->end_pos - i->start_pos;
893 return true;
896 if (i)
898 m_line_start_idx = i->start_pos;
899 m_line_num = i->line_num - 1;
901 else
903 m_line_start_idx = 0;
904 m_line_num = 0;
909 /* Let's walk from line m_line_num up to line_num - 1, without
910 copying any line. */
911 while (m_line_num < line_num - 1)
912 if (!goto_next_line ())
913 return false;
915 /* The line we want is the next one. Let's read and copy it back to
916 the caller. */
917 return get_next_line (line, line_len);
920 /* Return the physical source line that corresponds to FILE_PATH/LINE.
921 The line is not nul-terminated. The returned pointer is only
922 valid until the next call of location_get_source_line.
923 Note that the line can contain several null characters,
924 so the returned value's length has the actual length of the line.
925 If the function fails, a NULL char_span is returned. */
927 char_span
928 location_get_source_line (const char *file_path, int line)
930 char *buffer = NULL;
931 ssize_t len;
933 if (line == 0)
934 return char_span (NULL, 0);
936 if (file_path == NULL)
937 return char_span (NULL, 0);
939 diagnostic_file_cache_init ();
941 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
942 if (c == NULL)
943 return char_span (NULL, 0);
945 bool read = c->read_line_num (line, &buffer, &len);
946 if (!read)
947 return char_span (NULL, 0);
949 return char_span (buffer, len);
952 /* Determine if FILE_PATH missing a trailing newline on its final line.
953 Only valid to call once all of the file has been loaded, by
954 requesting a line number beyond the end of the file. */
956 bool
957 location_missing_trailing_newline (const char *file_path)
959 diagnostic_file_cache_init ();
961 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
962 if (c == NULL)
963 return false;
965 return c->missing_trailing_newline_p ();
968 /* Test if the location originates from the spelling location of a
969 builtin-tokens. That is, return TRUE if LOC is a (possibly
970 virtual) location of a built-in token that appears in the expansion
971 list of a macro. Please note that this function also works on
972 tokens that result from built-in tokens. For instance, the
973 function would return true if passed a token "4" that is the result
974 of the expansion of the built-in __LINE__ macro. */
975 bool
976 is_location_from_builtin_token (location_t loc)
978 const line_map_ordinary *map = NULL;
979 loc = linemap_resolve_location (line_table, loc,
980 LRK_SPELLING_LOCATION, &map);
981 return loc == BUILTINS_LOCATION;
984 /* Expand the source location LOC into a human readable location. If
985 LOC is virtual, it resolves to the expansion point of the involved
986 macro. If LOC resolves to a builtin location, the file name of the
987 readable location is set to the string "<built-in>". */
989 expanded_location
990 expand_location (location_t loc)
992 return expand_location_1 (loc, /*expansion_point_p=*/true,
993 LOCATION_ASPECT_CARET);
996 /* Expand the source location LOC into a human readable location. If
997 LOC is virtual, it resolves to the expansion location of the
998 relevant macro. If LOC resolves to a builtin location, the file
999 name of the readable location is set to the string
1000 "<built-in>". */
1002 expanded_location
1003 expand_location_to_spelling_point (location_t loc,
1004 enum location_aspect aspect)
1006 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
1009 /* The rich_location class within libcpp requires a way to expand
1010 location_t instances, and relies on the client code
1011 providing a symbol named
1012 linemap_client_expand_location_to_spelling_point
1013 to do this.
1015 This is the implementation for libcommon.a (all host binaries),
1016 which simply calls into expand_location_1. */
1018 expanded_location
1019 linemap_client_expand_location_to_spelling_point (location_t loc,
1020 enum location_aspect aspect)
1022 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
1026 /* If LOCATION is in a system header and if it is a virtual location
1027 for a token coming from the expansion of a macro, unwind it to
1028 the location of the expansion point of the macro. If the expansion
1029 point is also in a system header return the original LOCATION.
1030 Otherwise, return the location of the expansion point.
1032 This is used for instance when we want to emit diagnostics about a
1033 token that may be located in a macro that is itself defined in a
1034 system header, for example, for the NULL macro. In such a case, if
1035 LOCATION were passed directly to diagnostic functions such as
1036 warning_at, the diagnostic would be suppressed (unless
1037 -Wsystem-headers). */
1039 location_t
1040 expansion_point_location_if_in_system_header (location_t location)
1042 if (!in_system_header_at (location))
1043 return location;
1045 location_t xloc = linemap_resolve_location (line_table, location,
1046 LRK_MACRO_EXPANSION_POINT,
1047 NULL);
1048 return in_system_header_at (xloc) ? location : xloc;
1051 /* If LOCATION is a virtual location for a token coming from the expansion
1052 of a macro, unwind to the location of the expansion point of the macro. */
1054 location_t
1055 expansion_point_location (location_t location)
1057 return linemap_resolve_location (line_table, location,
1058 LRK_MACRO_EXPANSION_POINT, NULL);
1061 /* Construct a location with caret at CARET, ranging from START to
1062 finish e.g.
1064 11111111112
1065 12345678901234567890
1067 523 return foo + bar;
1068 ~~~~^~~~~
1071 The location's caret is at the "+", line 523 column 15, but starts
1072 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
1073 of "bar" at column 19. */
1075 location_t
1076 make_location (location_t caret, location_t start, location_t finish)
1078 location_t pure_loc = get_pure_location (caret);
1079 source_range src_range;
1080 src_range.m_start = get_start (start);
1081 src_range.m_finish = get_finish (finish);
1082 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
1083 pure_loc,
1084 src_range,
1085 NULL,
1087 return combined_loc;
1090 /* Same as above, but taking a source range rather than two locations. */
1092 location_t
1093 make_location (location_t caret, source_range src_range)
1095 location_t pure_loc = get_pure_location (caret);
1096 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL, 0);
1099 /* An expanded_location stores the column in byte units. This function
1100 converts that column to display units. That requires reading the associated
1101 source line in order to calculate the display width. If that cannot be done
1102 for any reason, then returns the byte column as a fallback. */
1104 location_compute_display_column (expanded_location exploc,
1105 const cpp_char_column_policy &policy)
1107 if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1108 return exploc.column;
1109 char_span line = location_get_source_line (exploc.file, exploc.line);
1110 /* If line is NULL, this function returns exploc.column which is the
1111 desired fallback. */
1112 return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
1113 exploc.column, policy);
1116 /* Dump statistics to stderr about the memory usage of the line_table
1117 set of line maps. This also displays some statistics about macro
1118 expansion. */
1120 void
1121 dump_line_table_statistics (void)
1123 struct linemap_stats s;
1124 long total_used_map_size,
1125 macro_maps_size,
1126 total_allocated_map_size;
1128 memset (&s, 0, sizeof (s));
1130 linemap_get_statistics (line_table, &s);
1132 macro_maps_size = s.macro_maps_used_size
1133 + s.macro_maps_locations_size;
1135 total_allocated_map_size = s.ordinary_maps_allocated_size
1136 + s.macro_maps_allocated_size
1137 + s.macro_maps_locations_size;
1139 total_used_map_size = s.ordinary_maps_used_size
1140 + s.macro_maps_used_size
1141 + s.macro_maps_locations_size;
1143 fprintf (stderr, "Number of expanded macros: %5ld\n",
1144 s.num_expanded_macros);
1145 if (s.num_expanded_macros != 0)
1146 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
1147 s.num_macro_tokens / s.num_expanded_macros);
1148 fprintf (stderr,
1149 "\nLine Table allocations during the "
1150 "compilation process\n");
1151 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
1152 SIZE_AMOUNT (s.num_ordinary_maps_used));
1153 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
1154 SIZE_AMOUNT (s.ordinary_maps_used_size));
1155 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
1156 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1157 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
1158 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1159 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
1160 SIZE_AMOUNT (s.num_macro_maps_used));
1161 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
1162 SIZE_AMOUNT (s.macro_maps_used_size));
1163 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
1164 SIZE_AMOUNT (s.macro_maps_locations_size));
1165 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
1166 SIZE_AMOUNT (macro_maps_size));
1167 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
1168 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1169 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
1170 SIZE_AMOUNT (total_allocated_map_size));
1171 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
1172 SIZE_AMOUNT (total_used_map_size));
1173 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
1174 SIZE_AMOUNT (s.adhoc_table_size));
1175 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
1176 SIZE_AMOUNT (s.adhoc_table_entries_used));
1177 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
1178 SIZE_AMOUNT (line_table->num_optimized_ranges));
1179 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
1180 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
1182 fprintf (stderr, "\n");
1185 /* Get location one beyond the final location in ordinary map IDX. */
1187 static location_t
1188 get_end_location (class line_maps *set, unsigned int idx)
1190 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1191 return set->highest_location;
1193 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1194 return MAP_START_LOCATION (next_map);
1197 /* Helper function for write_digit_row. */
1199 static void
1200 write_digit (FILE *stream, int digit)
1202 fputc ('0' + (digit % 10), stream);
1205 /* Helper function for dump_location_info.
1206 Write a row of numbers to STREAM, numbering a source line,
1207 giving the units, tens, hundreds etc of the column number. */
1209 static void
1210 write_digit_row (FILE *stream, int indent,
1211 const line_map_ordinary *map,
1212 location_t loc, int max_col, int divisor)
1214 fprintf (stream, "%*c", indent, ' ');
1215 fprintf (stream, "|");
1216 for (int column = 1; column < max_col; column++)
1218 location_t column_loc = loc + (column << map->m_range_bits);
1219 write_digit (stream, column_loc / divisor);
1221 fprintf (stream, "\n");
1224 /* Write a half-closed (START) / half-open (END) interval of
1225 location_t to STREAM. */
1227 static void
1228 dump_location_range (FILE *stream,
1229 location_t start, location_t end)
1231 fprintf (stream,
1232 " location_t interval: %u <= loc < %u\n",
1233 start, end);
1236 /* Write a labelled description of a half-closed (START) / half-open (END)
1237 interval of location_t to STREAM. */
1239 static void
1240 dump_labelled_location_range (FILE *stream,
1241 const char *name,
1242 location_t start, location_t end)
1244 fprintf (stream, "%s\n", name);
1245 dump_location_range (stream, start, end);
1246 fprintf (stream, "\n");
1249 /* Write a visualization of the locations in the line_table to STREAM. */
1251 void
1252 dump_location_info (FILE *stream)
1254 /* Visualize the reserved locations. */
1255 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1256 0, RESERVED_LOCATION_COUNT);
1258 /* Visualize the ordinary line_map instances, rendering the sources. */
1259 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1261 location_t end_location = get_end_location (line_table, idx);
1262 /* half-closed: doesn't include this one. */
1264 const line_map_ordinary *map
1265 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1266 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1267 dump_location_range (stream,
1268 MAP_START_LOCATION (map), end_location);
1269 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1270 fprintf (stream, " starting at line: %i\n",
1271 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1272 fprintf (stream, " column and range bits: %i\n",
1273 map->m_column_and_range_bits);
1274 fprintf (stream, " column bits: %i\n",
1275 map->m_column_and_range_bits - map->m_range_bits);
1276 fprintf (stream, " range bits: %i\n",
1277 map->m_range_bits);
1278 const char * reason;
1279 switch (map->reason) {
1280 case LC_ENTER:
1281 reason = "LC_ENTER";
1282 break;
1283 case LC_LEAVE:
1284 reason = "LC_LEAVE";
1285 break;
1286 case LC_RENAME:
1287 reason = "LC_RENAME";
1288 break;
1289 case LC_RENAME_VERBATIM:
1290 reason = "LC_RENAME_VERBATIM";
1291 break;
1292 case LC_ENTER_MACRO:
1293 reason = "LC_RENAME_MACRO";
1294 break;
1295 default:
1296 reason = "Unknown";
1298 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1300 const line_map_ordinary *includer_map
1301 = linemap_included_from_linemap (line_table, map);
1302 fprintf (stream, " included from location: %d",
1303 linemap_included_from (map));
1304 if (includer_map) {
1305 fprintf (stream, " (in ordinary map %d)",
1306 int (includer_map - line_table->info_ordinary.maps));
1308 fprintf (stream, "\n");
1310 /* Render the span of source lines that this "map" covers. */
1311 for (location_t loc = MAP_START_LOCATION (map);
1312 loc < end_location;
1313 loc += (1 << map->m_range_bits) )
1315 gcc_assert (pure_location_p (line_table, loc) );
1317 expanded_location exploc
1318 = linemap_expand_location (line_table, map, loc);
1320 if (exploc.column == 0)
1322 /* Beginning of a new source line: draw the line. */
1324 char_span line_text = location_get_source_line (exploc.file,
1325 exploc.line);
1326 if (!line_text)
1327 break;
1328 fprintf (stream,
1329 "%s:%3i|loc:%5i|%.*s\n",
1330 exploc.file, exploc.line,
1331 loc,
1332 (int)line_text.length (), line_text.get_buffer ());
1334 /* "loc" is at column 0, which means "the whole line".
1335 Render the locations *within* the line, by underlining
1336 it, showing the location_t numeric values
1337 at each column. */
1338 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1339 if (max_col > line_text.length ())
1340 max_col = line_text.length () + 1;
1342 int len_lnum = num_digits (exploc.line);
1343 if (len_lnum < 3)
1344 len_lnum = 3;
1345 int len_loc = num_digits (loc);
1346 if (len_loc < 5)
1347 len_loc = 5;
1349 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1351 /* Thousands. */
1352 if (end_location > 999)
1353 write_digit_row (stream, indent, map, loc, max_col, 1000);
1355 /* Hundreds. */
1356 if (end_location > 99)
1357 write_digit_row (stream, indent, map, loc, max_col, 100);
1359 /* Tens. */
1360 write_digit_row (stream, indent, map, loc, max_col, 10);
1362 /* Units. */
1363 write_digit_row (stream, indent, map, loc, max_col, 1);
1366 fprintf (stream, "\n");
1369 /* Visualize unallocated values. */
1370 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1371 line_table->highest_location,
1372 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1374 /* Visualize the macro line_map instances, rendering the sources. */
1375 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1377 /* Each macro map that is allocated owns location_t values
1378 that are *lower* that the one before them.
1379 Hence it's meaningful to view them either in order of ascending
1380 source locations, or in order of ascending macro map index. */
1381 const bool ascending_location_ts = true;
1382 unsigned int idx = (ascending_location_ts
1383 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1384 : i);
1385 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1386 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1387 idx,
1388 linemap_map_get_macro_name (map),
1389 MACRO_MAP_NUM_MACRO_TOKENS (map));
1390 dump_location_range (stream,
1391 map->start_location,
1392 (map->start_location
1393 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1394 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1395 "expansion point is location %i",
1396 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1397 fprintf (stream, " map->start_location: %u\n",
1398 map->start_location);
1400 fprintf (stream, " macro_locations:\n");
1401 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1403 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1404 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1406 /* linemap_add_macro_token encodes token numbers in an expansion
1407 by putting them after MAP_START_LOCATION. */
1409 /* I'm typically seeing 4 uninitialized entries at the end of
1410 0xafafafaf.
1411 This appears to be due to macro.cc:replace_args
1412 adding 2 extra args for padding tokens; presumably there may
1413 be a leading and/or trailing padding token injected,
1414 each for 2 more location slots.
1415 This would explain there being up to 4 location_ts slots
1416 that may be uninitialized. */
1418 fprintf (stream, " %u: %u, %u\n",
1422 if (x == y)
1424 if (x < MAP_START_LOCATION (map))
1425 inform (x, "token %u has %<x-location == y-location == %u%>",
1426 i, x);
1427 else
1428 fprintf (stream,
1429 "x-location == y-location == %u encodes token # %u\n",
1430 x, x - MAP_START_LOCATION (map));
1432 else
1434 inform (x, "token %u has %<x-location == %u%>", i, x);
1435 inform (x, "token %u has %<y-location == %u%>", i, y);
1438 fprintf (stream, "\n");
1441 /* It appears that MAX_LOCATION_T itself is never assigned to a
1442 macro map, presumably due to an off-by-one error somewhere
1443 between the logic in linemap_enter_macro and
1444 LINEMAPS_MACRO_LOWEST_LOCATION. */
1445 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1446 MAX_LOCATION_T,
1447 MAX_LOCATION_T + 1);
1449 /* Visualize ad-hoc values. */
1450 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1451 MAX_LOCATION_T + 1, UINT_MAX);
1454 /* string_concat's constructor. */
1456 string_concat::string_concat (int num, location_t *locs)
1457 : m_num (num)
1459 m_locs = ggc_vec_alloc <location_t> (num);
1460 for (int i = 0; i < num; i++)
1461 m_locs[i] = locs[i];
1464 /* string_concat_db's constructor. */
1466 string_concat_db::string_concat_db ()
1468 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1471 /* Record that a string concatenation occurred, covering NUM
1472 string literal tokens. LOCS is an array of size NUM, containing the
1473 locations of the tokens. A copy of LOCS is taken. */
1475 void
1476 string_concat_db::record_string_concatenation (int num, location_t *locs)
1478 gcc_assert (num > 1);
1479 gcc_assert (locs);
1481 location_t key_loc = get_key_loc (locs[0]);
1482 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1483 any data now recorded under key 'key_loc' would be overwritten by a
1484 subsequent call with the same key 'key_loc'. */
1485 if (RESERVED_LOCATION_P (key_loc))
1486 return;
1488 string_concat *concat
1489 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1490 m_table->put (key_loc, concat);
1493 /* Determine if LOC was the location of the initial token of a
1494 concatenation of string literal tokens.
1495 If so, *OUT_NUM is written to with the number of tokens, and
1496 *OUT_LOCS with the location of an array of locations of the
1497 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1498 storage owned by the string_concat_db.
1499 Otherwise, return false. */
1501 bool
1502 string_concat_db::get_string_concatenation (location_t loc,
1503 int *out_num,
1504 location_t **out_locs)
1506 gcc_assert (out_num);
1507 gcc_assert (out_locs);
1509 location_t key_loc = get_key_loc (loc);
1510 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1511 discussion in 'string_concat_db::record_string_concatenation'. */
1512 if (RESERVED_LOCATION_P (key_loc))
1513 return false;
1515 string_concat **concat = m_table->get (key_loc);
1516 if (!concat)
1517 return false;
1519 *out_num = (*concat)->m_num;
1520 *out_locs =(*concat)->m_locs;
1521 return true;
1524 /* Internal function. Canonicalize LOC into a form suitable for
1525 use as a key within the database, stripping away macro expansion,
1526 ad-hoc information, and range information, using the location of
1527 the start of LOC within an ordinary linemap. */
1529 location_t
1530 string_concat_db::get_key_loc (location_t loc)
1532 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1533 NULL);
1535 loc = get_range_from_loc (line_table, loc).m_start;
1537 return loc;
1540 /* Helper class for use within get_substring_ranges_for_loc.
1541 An vec of cpp_string with responsibility for releasing all of the
1542 str->text for each str in the vector. */
1544 class auto_cpp_string_vec : public auto_vec <cpp_string>
1546 public:
1547 auto_cpp_string_vec (int alloc)
1548 : auto_vec <cpp_string> (alloc) {}
1550 ~auto_cpp_string_vec ()
1552 /* Clean up the copies within this vec. */
1553 int i;
1554 cpp_string *str;
1555 FOR_EACH_VEC_ELT (*this, i, str)
1556 free (const_cast <unsigned char *> (str->text));
1560 /* Attempt to populate RANGES with source location information on the
1561 individual characters within the string literal found at STRLOC.
1562 If CONCATS is non-NULL, then any string literals that the token at
1563 STRLOC was concatenated with are also added to RANGES.
1565 Return NULL if successful, or an error message if any errors occurred (in
1566 which case RANGES may be only partially populated and should not
1567 be used).
1569 This is implemented by re-parsing the relevant source line(s). */
1571 static const char *
1572 get_substring_ranges_for_loc (cpp_reader *pfile,
1573 string_concat_db *concats,
1574 location_t strloc,
1575 enum cpp_ttype type,
1576 cpp_substring_ranges &ranges)
1578 gcc_assert (pfile);
1580 if (strloc == UNKNOWN_LOCATION)
1581 return "unknown location";
1583 /* Reparsing the strings requires accurate location information.
1584 If -ftrack-macro-expansion has been overridden from its default
1585 of 2, then we might have a location of a macro expansion point,
1586 rather than the location of the literal itself.
1587 Avoid this by requiring that we have full macro expansion tracking
1588 for substring locations to be available. */
1589 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1590 return "track_macro_expansion != 2";
1592 /* If #line or # 44 "file"-style directives are present, then there's
1593 no guarantee that the line numbers we have can be used to locate
1594 the strings. For example, we might have a .i file with # directives
1595 pointing back to lines within a .c file, but the .c file might
1596 have been edited since the .i file was created.
1597 In such a case, the safest course is to disable on-demand substring
1598 locations. */
1599 if (line_table->seen_line_directive)
1600 return "seen line directive";
1602 /* If string concatenation has occurred at STRLOC, get the locations
1603 of all of the literal tokens making up the compound string.
1604 Otherwise, just use STRLOC. */
1605 int num_locs = 1;
1606 location_t *strlocs = &strloc;
1607 if (concats)
1608 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1610 auto_cpp_string_vec strs (num_locs);
1611 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1612 for (int i = 0; i < num_locs; i++)
1614 /* Get range of strloc. We will use it to locate the start and finish
1615 of the literal token within the line. */
1616 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1618 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1620 /* If the string token was within a macro expansion, then we can
1621 cope with it for the simple case where we have a single token.
1622 Otherwise, bail out. */
1623 if (src_range.m_start != src_range.m_finish)
1624 return "macro expansion";
1626 else
1628 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1629 /* If so, we can't reliably determine where the token started within
1630 its line. */
1631 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1633 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1634 /* If so, we can't reliably determine where the token finished
1635 within its line. */
1636 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1639 expanded_location start
1640 = expand_location_to_spelling_point (src_range.m_start,
1641 LOCATION_ASPECT_START);
1642 expanded_location finish
1643 = expand_location_to_spelling_point (src_range.m_finish,
1644 LOCATION_ASPECT_FINISH);
1645 if (start.file != finish.file)
1646 return "range endpoints are in different files";
1647 if (start.line != finish.line)
1648 return "range endpoints are on different lines";
1649 if (start.column > finish.column)
1650 return "range endpoints are reversed";
1652 char_span line = location_get_source_line (start.file, start.line);
1653 if (!line)
1654 return "unable to read source line";
1656 /* Determine the location of the literal (including quotes
1657 and leading prefix chars, such as the 'u' in a u""
1658 token). */
1659 size_t literal_length = finish.column - start.column + 1;
1661 /* Ensure that we don't crash if we got the wrong location. */
1662 if (start.column < 1)
1663 return "zero start column";
1664 if (line.length () < (start.column - 1 + literal_length))
1665 return "line is not wide enough";
1667 char_span literal = line.subspan (start.column - 1, literal_length);
1669 cpp_string from;
1670 from.len = literal_length;
1671 /* Make a copy of the literal, to avoid having to rely on
1672 the lifetime of the copy of the line within the cache.
1673 This will be released by the auto_cpp_string_vec dtor. */
1674 from.text = (unsigned char *)literal.xstrdup ();
1675 strs.safe_push (from);
1677 /* For very long lines, a new linemap could have started
1678 halfway through the token.
1679 Ensure that the loc_reader uses the linemap of the
1680 *end* of the token for its start location. */
1681 const line_map_ordinary *start_ord_map;
1682 linemap_resolve_location (line_table, src_range.m_start,
1683 LRK_SPELLING_LOCATION, &start_ord_map);
1684 const line_map_ordinary *final_ord_map;
1685 linemap_resolve_location (line_table, src_range.m_finish,
1686 LRK_SPELLING_LOCATION, &final_ord_map);
1687 if (start_ord_map == NULL || final_ord_map == NULL)
1688 return "failed to get ordinary maps";
1689 /* Bulletproofing. We ought to only have different ordinary maps
1690 for start vs finish due to line-length jumps. */
1691 if (start_ord_map != final_ord_map
1692 && start_ord_map->to_file != final_ord_map->to_file)
1693 return "start and finish are spelled in different ordinary maps";
1694 /* The file from linemap_resolve_location ought to match that from
1695 expand_location_to_spelling_point. */
1696 if (start_ord_map->to_file != start.file)
1697 return "mismatching file after resolving linemap";
1699 location_t start_loc
1700 = linemap_position_for_line_and_column (line_table, final_ord_map,
1701 start.line, start.column);
1703 cpp_string_location_reader loc_reader (start_loc, line_table);
1704 loc_readers.safe_push (loc_reader);
1707 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1708 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1709 loc_readers.address (),
1710 num_locs, &ranges, type);
1711 if (err)
1712 return err;
1714 /* Success: "ranges" should now contain information on the string. */
1715 return NULL;
1718 /* Attempt to populate *OUT_LOC with source location information on the
1719 given characters within the string literal found at STRLOC.
1720 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1721 character set.
1723 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1724 and string literal "012345\n789"
1725 *OUT_LOC is written to with:
1726 "012345\n789"
1727 ~^~~~~
1729 If CONCATS is non-NULL, then any string literals that the token at
1730 STRLOC was concatenated with are also considered.
1732 This is implemented by re-parsing the relevant source line(s).
1734 Return NULL if successful, or an error message if any errors occurred.
1735 Error messages are intended for GCC developers (to help debugging) rather
1736 than for end-users. */
1738 const char *
1739 get_location_within_string (cpp_reader *pfile,
1740 string_concat_db *concats,
1741 location_t strloc,
1742 enum cpp_ttype type,
1743 int caret_idx, int start_idx, int end_idx,
1744 location_t *out_loc)
1746 gcc_checking_assert (caret_idx >= 0);
1747 gcc_checking_assert (start_idx >= 0);
1748 gcc_checking_assert (end_idx >= 0);
1749 gcc_assert (out_loc);
1751 cpp_substring_ranges ranges;
1752 const char *err
1753 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1754 if (err)
1755 return err;
1757 if (caret_idx >= ranges.get_num_ranges ())
1758 return "caret_idx out of range";
1759 if (start_idx >= ranges.get_num_ranges ())
1760 return "start_idx out of range";
1761 if (end_idx >= ranges.get_num_ranges ())
1762 return "end_idx out of range";
1764 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1765 ranges.get_range (start_idx).m_start,
1766 ranges.get_range (end_idx).m_finish);
1767 return NULL;
1770 /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */
1772 location_t
1773 location_with_discriminator (location_t locus, int discriminator)
1775 tree block = LOCATION_BLOCK (locus);
1776 source_range src_range = get_range_from_loc (line_table, locus);
1777 locus = get_pure_location (locus);
1779 if (locus == UNKNOWN_LOCATION)
1780 return locus;
1782 return COMBINE_LOCATION_DATA (line_table, locus, src_range, block, discriminator);
1785 /* Return TRUE if LOCUS represents a location with a discriminator. */
1787 bool
1788 has_discriminator (location_t locus)
1790 return get_discriminator_from_loc (locus) != 0;
1793 /* Return the discriminator for LOCUS. */
1796 get_discriminator_from_loc (location_t locus)
1798 return get_discriminator_from_loc (line_table, locus);
1801 #if CHECKING_P
1803 namespace selftest {
1805 /* Selftests of location handling. */
1807 /* Attempt to populate *OUT_RANGE with source location information on the
1808 given character within the string literal found at STRLOC.
1809 CHAR_IDX refers to an offset within the execution character set.
1810 If CONCATS is non-NULL, then any string literals that the token at
1811 STRLOC was concatenated with are also considered.
1813 This is implemented by re-parsing the relevant source line(s).
1815 Return NULL if successful, or an error message if any errors occurred.
1816 Error messages are intended for GCC developers (to help debugging) rather
1817 than for end-users. */
1819 static const char *
1820 get_source_range_for_char (cpp_reader *pfile,
1821 string_concat_db *concats,
1822 location_t strloc,
1823 enum cpp_ttype type,
1824 int char_idx,
1825 source_range *out_range)
1827 gcc_checking_assert (char_idx >= 0);
1828 gcc_assert (out_range);
1830 cpp_substring_ranges ranges;
1831 const char *err
1832 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1833 if (err)
1834 return err;
1836 if (char_idx >= ranges.get_num_ranges ())
1837 return "char_idx out of range";
1839 *out_range = ranges.get_range (char_idx);
1840 return NULL;
1843 /* As get_source_range_for_char, but write to *OUT the number
1844 of ranges that are available. */
1846 static const char *
1847 get_num_source_ranges_for_substring (cpp_reader *pfile,
1848 string_concat_db *concats,
1849 location_t strloc,
1850 enum cpp_ttype type,
1851 int *out)
1853 gcc_assert (out);
1855 cpp_substring_ranges ranges;
1856 const char *err
1857 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1859 if (err)
1860 return err;
1862 *out = ranges.get_num_ranges ();
1863 return NULL;
1866 /* Selftests of location handling. */
1868 /* Verify that compare() on linenum_type handles comparisons over the full
1869 range of the type. */
1871 static void
1872 test_linenum_comparisons ()
1874 linenum_type min_line (0);
1875 linenum_type max_line (0xffffffff);
1876 ASSERT_EQ (0, compare (min_line, min_line));
1877 ASSERT_EQ (0, compare (max_line, max_line));
1879 ASSERT_GT (compare (max_line, min_line), 0);
1880 ASSERT_LT (compare (min_line, max_line), 0);
1883 /* Helper function for verifying location data: when location_t
1884 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1885 as having column 0. */
1887 static bool
1888 should_have_column_data_p (location_t loc)
1890 if (IS_ADHOC_LOC (loc))
1891 loc = get_location_from_adhoc_loc (line_table, loc);
1892 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1893 return false;
1894 return true;
1897 /* Selftest for should_have_column_data_p. */
1899 static void
1900 test_should_have_column_data_p ()
1902 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1903 ASSERT_TRUE
1904 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1905 ASSERT_FALSE
1906 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1909 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1910 on LOC. */
1912 static void
1913 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1914 location_t loc)
1916 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1917 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1918 /* If location_t values are sufficiently high, then column numbers
1919 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1920 When close to the threshold, column numbers *may* be present: if
1921 the final linemap before the threshold contains a line that straddles
1922 the threshold, locations in that line have column information. */
1923 if (should_have_column_data_p (loc))
1924 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1927 /* Various selftests involve constructing a line table and one or more
1928 line maps within it.
1930 For maximum test coverage we want to run these tests with a variety
1931 of situations:
1932 - line_table->default_range_bits: some frontends use a non-zero value
1933 and others use zero
1934 - the fallback modes within line-map.cc: there are various threshold
1935 values for location_t beyond line-map.cc changes
1936 behavior (disabling of the range-packing optimization, disabling
1937 of column-tracking). We can exercise these by starting the line_table
1938 at interesting values at or near these thresholds.
1940 The following struct describes a particular case within our test
1941 matrix. */
1943 class line_table_case
1945 public:
1946 line_table_case (int default_range_bits, int base_location)
1947 : m_default_range_bits (default_range_bits),
1948 m_base_location (base_location)
1951 int m_default_range_bits;
1952 int m_base_location;
1955 /* Constructor. Store the old value of line_table, and create a new
1956 one, using sane defaults. */
1958 line_table_test::line_table_test ()
1960 gcc_assert (saved_line_table == NULL);
1961 saved_line_table = line_table;
1962 line_table = ggc_alloc<line_maps> ();
1963 linemap_init (line_table, BUILTINS_LOCATION);
1964 gcc_assert (saved_line_table->reallocator);
1965 line_table->reallocator = saved_line_table->reallocator;
1966 gcc_assert (saved_line_table->round_alloc_size);
1967 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1968 line_table->default_range_bits = 0;
1971 /* Constructor. Store the old value of line_table, and create a new
1972 one, using the sitation described in CASE_. */
1974 line_table_test::line_table_test (const line_table_case &case_)
1976 gcc_assert (saved_line_table == NULL);
1977 saved_line_table = line_table;
1978 line_table = ggc_alloc<line_maps> ();
1979 linemap_init (line_table, BUILTINS_LOCATION);
1980 gcc_assert (saved_line_table->reallocator);
1981 line_table->reallocator = saved_line_table->reallocator;
1982 gcc_assert (saved_line_table->round_alloc_size);
1983 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1984 line_table->default_range_bits = case_.m_default_range_bits;
1985 if (case_.m_base_location)
1987 line_table->highest_location = case_.m_base_location;
1988 line_table->highest_line = case_.m_base_location;
1992 /* Destructor. Restore the old value of line_table. */
1994 line_table_test::~line_table_test ()
1996 gcc_assert (saved_line_table != NULL);
1997 line_table = saved_line_table;
1998 saved_line_table = NULL;
2001 /* Verify basic operation of ordinary linemaps. */
2003 static void
2004 test_accessing_ordinary_linemaps (const line_table_case &case_)
2006 line_table_test ltt (case_);
2008 /* Build a simple linemap describing some locations. */
2009 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
2011 linemap_line_start (line_table, 1, 100);
2012 location_t loc_a = linemap_position_for_column (line_table, 1);
2013 location_t loc_b = linemap_position_for_column (line_table, 23);
2015 linemap_line_start (line_table, 2, 100);
2016 location_t loc_c = linemap_position_for_column (line_table, 1);
2017 location_t loc_d = linemap_position_for_column (line_table, 17);
2019 /* Example of a very long line. */
2020 linemap_line_start (line_table, 3, 2000);
2021 location_t loc_e = linemap_position_for_column (line_table, 700);
2023 /* Transitioning back to a short line. */
2024 linemap_line_start (line_table, 4, 0);
2025 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
2027 if (should_have_column_data_p (loc_back_to_short))
2029 /* Verify that we switched to short lines in the linemap. */
2030 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
2031 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
2034 /* Example of a line that will eventually be seen to be longer
2035 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
2036 below that. */
2037 linemap_line_start (line_table, 5, 2000);
2039 location_t loc_start_of_very_long_line
2040 = linemap_position_for_column (line_table, 2000);
2041 location_t loc_too_wide
2042 = linemap_position_for_column (line_table, 4097);
2043 location_t loc_too_wide_2
2044 = linemap_position_for_column (line_table, 4098);
2046 /* ...and back to a sane line length. */
2047 linemap_line_start (line_table, 6, 100);
2048 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
2050 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2052 /* Multiple files. */
2053 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
2054 linemap_line_start (line_table, 1, 200);
2055 location_t loc_f = linemap_position_for_column (line_table, 150);
2056 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2058 /* Verify that we can recover the location info. */
2059 assert_loceq ("foo.c", 1, 1, loc_a);
2060 assert_loceq ("foo.c", 1, 23, loc_b);
2061 assert_loceq ("foo.c", 2, 1, loc_c);
2062 assert_loceq ("foo.c", 2, 17, loc_d);
2063 assert_loceq ("foo.c", 3, 700, loc_e);
2064 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
2066 /* In the very wide line, the initial location should be fully tracked. */
2067 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
2068 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
2069 be disabled. */
2070 assert_loceq ("foo.c", 5, 0, loc_too_wide);
2071 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
2072 /*...and column-tracking should be re-enabled for subsequent lines. */
2073 assert_loceq ("foo.c", 6, 10, loc_sane_again);
2075 assert_loceq ("bar.c", 1, 150, loc_f);
2077 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2078 ASSERT_TRUE (pure_location_p (line_table, loc_a));
2080 /* Verify using make_location to build a range, and extracting data
2081 back from it. */
2082 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2083 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2084 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2085 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2086 ASSERT_EQ (loc_b, src_range.m_start);
2087 ASSERT_EQ (loc_d, src_range.m_finish);
2090 /* Verify various properties of UNKNOWN_LOCATION. */
2092 static void
2093 test_unknown_location ()
2095 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2096 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2097 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2100 /* Verify various properties of BUILTINS_LOCATION. */
2102 static void
2103 test_builtins ()
2105 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
2106 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2109 /* Regression test for make_location.
2110 Ensure that we use pure locations for the start/finish of the range,
2111 rather than storing a packed or ad-hoc range as the start/finish. */
2113 static void
2114 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2116 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2117 with C++ frontend.
2118 ....................0000000001111111111222.
2119 ....................1234567890123456789012. */
2120 const char *content = " r += !aaa == bbb;\n";
2121 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2122 line_table_test ltt (case_);
2123 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2125 const location_t c11 = linemap_position_for_column (line_table, 11);
2126 const location_t c12 = linemap_position_for_column (line_table, 12);
2127 const location_t c13 = linemap_position_for_column (line_table, 13);
2128 const location_t c14 = linemap_position_for_column (line_table, 14);
2129 const location_t c21 = linemap_position_for_column (line_table, 21);
2131 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2132 return;
2134 /* Use column 13 for the caret location, arbitrarily, to verify that we
2135 handle start != caret. */
2136 const location_t aaa = make_location (c13, c12, c14);
2137 ASSERT_EQ (c13, get_pure_location (aaa));
2138 ASSERT_EQ (c12, get_start (aaa));
2139 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2140 ASSERT_EQ (c14, get_finish (aaa));
2141 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2143 /* Make a location using a location with a range as the start-point. */
2144 const location_t not_aaa = make_location (c11, aaa, c14);
2145 ASSERT_EQ (c11, get_pure_location (not_aaa));
2146 /* It should use the start location of the range, not store the range
2147 itself. */
2148 ASSERT_EQ (c12, get_start (not_aaa));
2149 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2150 ASSERT_EQ (c14, get_finish (not_aaa));
2151 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2153 /* Similarly, make a location with a range as the end-point. */
2154 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2155 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2156 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2157 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2158 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2159 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2160 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2161 /* It should use the finish location of the range, not store the range
2162 itself. */
2163 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2164 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2165 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2166 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2167 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2170 /* Verify reading of input files (e.g. for caret-based diagnostics). */
2172 static void
2173 test_reading_source_line ()
2175 /* Create a tempfile and write some text to it. */
2176 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2177 "01234567890123456789\n"
2178 "This is the test text\n"
2179 "This is the 3rd line");
2181 /* Read back a specific line from the tempfile. */
2182 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2183 ASSERT_TRUE (source_line);
2184 ASSERT_TRUE (source_line.get_buffer () != NULL);
2185 ASSERT_EQ (20, source_line.length ());
2186 ASSERT_TRUE (!strncmp ("This is the 3rd line",
2187 source_line.get_buffer (), source_line.length ()));
2189 source_line = location_get_source_line (tmp.get_filename (), 2);
2190 ASSERT_TRUE (source_line);
2191 ASSERT_TRUE (source_line.get_buffer () != NULL);
2192 ASSERT_EQ (21, source_line.length ());
2193 ASSERT_TRUE (!strncmp ("This is the test text",
2194 source_line.get_buffer (), source_line.length ()));
2196 source_line = location_get_source_line (tmp.get_filename (), 4);
2197 ASSERT_FALSE (source_line);
2198 ASSERT_TRUE (source_line.get_buffer () == NULL);
2201 /* Tests of lexing. */
2203 /* Verify that token TOK from PARSER has cpp_token_as_text
2204 equal to EXPECTED_TEXT. */
2206 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
2207 SELFTEST_BEGIN_STMT \
2208 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
2209 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
2210 SELFTEST_END_STMT
2212 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2213 and ranges from EXP_START_COL to EXP_FINISH_COL.
2214 Use LOC as the effective location of the selftest. */
2216 static void
2217 assert_token_loc_eq (const location &loc,
2218 const cpp_token *tok,
2219 const char *exp_filename, int exp_linenum,
2220 int exp_start_col, int exp_finish_col)
2222 location_t tok_loc = tok->src_loc;
2223 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2224 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2226 /* If location_t values are sufficiently high, then column numbers
2227 will be unavailable. */
2228 if (!should_have_column_data_p (tok_loc))
2229 return;
2231 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2232 source_range tok_range = get_range_from_loc (line_table, tok_loc);
2233 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2234 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2237 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2238 SELFTEST_LOCATION as the effective location of the selftest. */
2240 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2241 EXP_START_COL, EXP_FINISH_COL) \
2242 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2243 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2245 /* Test of lexing a file using libcpp, verifying tokens and their
2246 location information. */
2248 static void
2249 test_lexer (const line_table_case &case_)
2251 /* Create a tempfile and write some text to it. */
2252 const char *content =
2253 /*00000000011111111112222222222333333.3333444444444.455555555556
2254 12345678901234567890123456789012345.6789012345678.901234567890. */
2255 ("test_name /* c-style comment */\n"
2256 " \"test literal\"\n"
2257 " // test c++-style comment\n"
2258 " 42\n");
2259 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2261 line_table_test ltt (case_);
2263 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2265 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2266 ASSERT_NE (fname, NULL);
2268 /* Verify that we get the expected tokens back, with the correct
2269 location information. */
2271 location_t loc;
2272 const cpp_token *tok;
2273 tok = cpp_get_token_with_location (parser, &loc);
2274 ASSERT_NE (tok, NULL);
2275 ASSERT_EQ (tok->type, CPP_NAME);
2276 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2277 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2279 tok = cpp_get_token_with_location (parser, &loc);
2280 ASSERT_NE (tok, NULL);
2281 ASSERT_EQ (tok->type, CPP_STRING);
2282 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2283 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2285 tok = cpp_get_token_with_location (parser, &loc);
2286 ASSERT_NE (tok, NULL);
2287 ASSERT_EQ (tok->type, CPP_NUMBER);
2288 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2289 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2291 tok = cpp_get_token_with_location (parser, &loc);
2292 ASSERT_NE (tok, NULL);
2293 ASSERT_EQ (tok->type, CPP_EOF);
2295 cpp_finish (parser, NULL);
2296 cpp_destroy (parser);
2299 /* Forward decls. */
2301 class lexer_test;
2302 class lexer_test_options;
2304 /* A class for specifying options of a lexer_test.
2305 The "apply" vfunc is called during the lexer_test constructor. */
2307 class lexer_test_options
2309 public:
2310 virtual void apply (lexer_test &) = 0;
2313 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2314 in its dtor.
2316 This is needed by struct lexer_test to ensure that the cleanup of the
2317 cpp_reader happens *after* the cleanup of the temp_source_file. */
2319 class cpp_reader_ptr
2321 public:
2322 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2324 ~cpp_reader_ptr ()
2326 cpp_finish (m_ptr, NULL);
2327 cpp_destroy (m_ptr);
2330 operator cpp_reader * () const { return m_ptr; }
2332 private:
2333 cpp_reader *m_ptr;
2336 /* A struct for writing lexer tests. */
2338 class lexer_test
2340 public:
2341 lexer_test (const line_table_case &case_, const char *content,
2342 lexer_test_options *options);
2343 ~lexer_test ();
2345 const cpp_token *get_token ();
2347 /* The ordering of these fields matters.
2348 The line_table_test must be first, since the cpp_reader_ptr
2349 uses it.
2350 The cpp_reader must be cleaned up *after* the temp_source_file
2351 since the filenames in input.cc's input cache are owned by the
2352 cpp_reader; in particular, when ~temp_source_file evicts the
2353 filename the filenames must still be alive. */
2354 line_table_test m_ltt;
2355 cpp_reader_ptr m_parser;
2356 temp_source_file m_tempfile;
2357 string_concat_db m_concats;
2358 bool m_implicitly_expect_EOF;
2361 /* Use an EBCDIC encoding for the execution charset, specifically
2362 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2364 This exercises iconv integration within libcpp.
2365 Not every build of iconv supports the given charset,
2366 so we need to flag this error and handle it gracefully. */
2368 class ebcdic_execution_charset : public lexer_test_options
2370 public:
2371 ebcdic_execution_charset () : m_num_iconv_errors (0)
2373 gcc_assert (s_singleton == NULL);
2374 s_singleton = this;
2376 ~ebcdic_execution_charset ()
2378 gcc_assert (s_singleton == this);
2379 s_singleton = NULL;
2382 void apply (lexer_test &test) final override
2384 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2385 cpp_opts->narrow_charset = "IBM1047";
2387 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2388 callbacks->diagnostic = on_diagnostic;
2391 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2392 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2393 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2394 rich_location *richloc ATTRIBUTE_UNUSED,
2395 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2396 ATTRIBUTE_FPTR_PRINTF(5,0)
2398 gcc_assert (s_singleton);
2399 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2400 const char *msg = "conversion from %s to %s not supported by iconv";
2401 #ifdef ENABLE_NLS
2402 msg = dgettext ("cpplib", msg);
2403 #endif
2404 /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
2405 when the local iconv build doesn't support the conversion. */
2406 if (strcmp (msgid, msg) == 0)
2408 s_singleton->m_num_iconv_errors++;
2409 return true;
2412 /* Otherwise, we have an unexpected error. */
2413 abort ();
2416 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2418 private:
2419 static ebcdic_execution_charset *s_singleton;
2420 int m_num_iconv_errors;
2423 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2425 /* A lexer_test_options subclass that records a list of diagnostic
2426 messages emitted by the lexer. */
2428 class lexer_diagnostic_sink : public lexer_test_options
2430 public:
2431 lexer_diagnostic_sink ()
2433 gcc_assert (s_singleton == NULL);
2434 s_singleton = this;
2436 ~lexer_diagnostic_sink ()
2438 gcc_assert (s_singleton == this);
2439 s_singleton = NULL;
2441 int i;
2442 char *str;
2443 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2444 free (str);
2447 void apply (lexer_test &test) final override
2449 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2450 callbacks->diagnostic = on_diagnostic;
2453 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2454 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2455 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2456 rich_location *richloc ATTRIBUTE_UNUSED,
2457 const char *msgid, va_list *ap)
2458 ATTRIBUTE_FPTR_PRINTF(5,0)
2460 char *msg = xvasprintf (msgid, *ap);
2461 s_singleton->m_diagnostics.safe_push (msg);
2462 return true;
2465 auto_vec<char *> m_diagnostics;
2467 private:
2468 static lexer_diagnostic_sink *s_singleton;
2471 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2473 /* Constructor. Override line_table with a new instance based on CASE_,
2474 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2475 start parsing the tempfile. */
2477 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2478 lexer_test_options *options)
2479 : m_ltt (case_),
2480 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2481 /* Create a tempfile and write the text to it. */
2482 m_tempfile (SELFTEST_LOCATION, ".c", content),
2483 m_concats (),
2484 m_implicitly_expect_EOF (true)
2486 if (options)
2487 options->apply (*this);
2489 cpp_init_iconv (m_parser);
2491 /* Parse the file. */
2492 const char *fname = cpp_read_main_file (m_parser,
2493 m_tempfile.get_filename ());
2494 ASSERT_NE (fname, NULL);
2497 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2499 lexer_test::~lexer_test ()
2501 location_t loc;
2502 const cpp_token *tok;
2504 if (m_implicitly_expect_EOF)
2506 tok = cpp_get_token_with_location (m_parser, &loc);
2507 ASSERT_NE (tok, NULL);
2508 ASSERT_EQ (tok->type, CPP_EOF);
2512 /* Get the next token from m_parser. */
2514 const cpp_token *
2515 lexer_test::get_token ()
2517 location_t loc;
2518 const cpp_token *tok;
2520 tok = cpp_get_token_with_location (m_parser, &loc);
2521 ASSERT_NE (tok, NULL);
2522 return tok;
2525 /* Verify that locations within string literals are correctly handled. */
2527 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2528 using the string concatenation database for TEST.
2530 Assert that the character at index IDX is on EXPECTED_LINE,
2531 and that it begins at column EXPECTED_START_COL and ends at
2532 EXPECTED_FINISH_COL (unless the locations are beyond
2533 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2534 columns). */
2536 static void
2537 assert_char_at_range (const location &loc,
2538 lexer_test& test,
2539 location_t strloc, enum cpp_ttype type, int idx,
2540 int expected_line, int expected_start_col,
2541 int expected_finish_col)
2543 cpp_reader *pfile = test.m_parser;
2544 string_concat_db *concats = &test.m_concats;
2546 source_range actual_range = source_range();
2547 const char *err
2548 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2549 &actual_range);
2550 if (should_have_column_data_p (strloc))
2551 ASSERT_EQ_AT (loc, NULL, err);
2552 else
2554 ASSERT_STREQ_AT (loc,
2555 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2556 err);
2557 return;
2560 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2561 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2562 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2563 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2565 if (should_have_column_data_p (actual_range.m_start))
2567 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2568 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2570 if (should_have_column_data_p (actual_range.m_finish))
2572 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2573 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2577 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2578 the effective location of any errors. */
2580 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2581 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2582 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2583 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2584 (EXPECTED_FINISH_COL))
2586 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2587 using the string concatenation database for TEST.
2589 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2591 static void
2592 assert_num_substring_ranges (const location &loc,
2593 lexer_test& test,
2594 location_t strloc,
2595 enum cpp_ttype type,
2596 int expected_num_ranges)
2598 cpp_reader *pfile = test.m_parser;
2599 string_concat_db *concats = &test.m_concats;
2601 int actual_num_ranges = -1;
2602 const char *err
2603 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2604 &actual_num_ranges);
2605 if (should_have_column_data_p (strloc))
2606 ASSERT_EQ_AT (loc, NULL, err);
2607 else
2609 ASSERT_STREQ_AT (loc,
2610 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2611 err);
2612 return;
2614 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2617 /* Macro for calling assert_num_substring_ranges, supplying
2618 SELFTEST_LOCATION for the effective location of any errors. */
2620 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2621 EXPECTED_NUM_RANGES) \
2622 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2623 (TYPE), (EXPECTED_NUM_RANGES))
2626 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2627 returns an error (using the string concatenation database for TEST). */
2629 static void
2630 assert_has_no_substring_ranges (const location &loc,
2631 lexer_test& test,
2632 location_t strloc,
2633 enum cpp_ttype type,
2634 const char *expected_err)
2636 cpp_reader *pfile = test.m_parser;
2637 string_concat_db *concats = &test.m_concats;
2638 cpp_substring_ranges ranges;
2639 const char *actual_err
2640 = get_substring_ranges_for_loc (pfile, concats, strloc,
2641 type, ranges);
2642 if (should_have_column_data_p (strloc))
2643 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2644 else
2645 ASSERT_STREQ_AT (loc,
2646 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2647 actual_err);
2650 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2651 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2652 (STRLOC), (TYPE), (ERR))
2654 /* Lex a simple string literal. Verify the substring location data, before
2655 and after running cpp_interpret_string on it. */
2657 static void
2658 test_lexer_string_locations_simple (const line_table_case &case_)
2660 /* Digits 0-9 (with 0 at column 10), the simple way.
2661 ....................000000000.11111111112.2222222223333333333
2662 ....................123456789.01234567890.1234567890123456789
2663 We add a trailing comment to ensure that we correctly locate
2664 the end of the string literal token. */
2665 const char *content = " \"0123456789\" /* not a string */\n";
2666 lexer_test test (case_, content, NULL);
2668 /* Verify that we get the expected token back, with the correct
2669 location information. */
2670 const cpp_token *tok = test.get_token ();
2671 ASSERT_EQ (tok->type, CPP_STRING);
2672 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2673 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2675 /* At this point in lexing, the quote characters are treated as part of
2676 the string (they are stripped off by cpp_interpret_string). */
2678 ASSERT_EQ (tok->val.str.len, 12);
2680 /* Verify that cpp_interpret_string works. */
2681 cpp_string dst_string;
2682 const enum cpp_ttype type = CPP_STRING;
2683 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2684 &dst_string, type);
2685 ASSERT_TRUE (result);
2686 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2687 free (const_cast <unsigned char *> (dst_string.text));
2689 /* Verify ranges of individual characters. This no longer includes the
2690 opening quote, but does include the closing quote. */
2691 for (int i = 0; i <= 10; i++)
2692 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2693 10 + i, 10 + i);
2695 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2698 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2699 encoding. */
2701 static void
2702 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2704 /* EBCDIC support requires iconv. */
2705 if (!HAVE_ICONV)
2706 return;
2708 /* Digits 0-9 (with 0 at column 10), the simple way.
2709 ....................000000000.11111111112.2222222223333333333
2710 ....................123456789.01234567890.1234567890123456789
2711 We add a trailing comment to ensure that we correctly locate
2712 the end of the string literal token. */
2713 const char *content = " \"0123456789\" /* not a string */\n";
2714 ebcdic_execution_charset use_ebcdic;
2715 lexer_test test (case_, content, &use_ebcdic);
2717 /* Verify that we get the expected token back, with the correct
2718 location information. */
2719 const cpp_token *tok = test.get_token ();
2720 ASSERT_EQ (tok->type, CPP_STRING);
2721 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2722 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2724 /* At this point in lexing, the quote characters are treated as part of
2725 the string (they are stripped off by cpp_interpret_string). */
2727 ASSERT_EQ (tok->val.str.len, 12);
2729 /* The remainder of the test requires an iconv implementation that
2730 can convert from UTF-8 to the EBCDIC encoding requested above. */
2731 if (use_ebcdic.iconv_errors_occurred_p ())
2732 return;
2734 /* Verify that cpp_interpret_string works. */
2735 cpp_string dst_string;
2736 const enum cpp_ttype type = CPP_STRING;
2737 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2738 &dst_string, type);
2739 ASSERT_TRUE (result);
2740 /* We should now have EBCDIC-encoded text, specifically
2741 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2742 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2743 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2744 (const char *)dst_string.text);
2745 free (const_cast <unsigned char *> (dst_string.text));
2747 /* Verify that we don't attempt to record substring location information
2748 for such cases. */
2749 ASSERT_HAS_NO_SUBSTRING_RANGES
2750 (test, tok->src_loc, type,
2751 "execution character set != source character set");
2754 /* Lex a string literal containing a hex-escaped character.
2755 Verify the substring location data, before and after running
2756 cpp_interpret_string on it. */
2758 static void
2759 test_lexer_string_locations_hex (const line_table_case &case_)
2761 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2762 and with a space in place of digit 6, to terminate the escaped
2763 hex code.
2764 ....................000000000.111111.11112222.
2765 ....................123456789.012345.67890123. */
2766 const char *content = " \"01234\\x35 789\"\n";
2767 lexer_test test (case_, content, NULL);
2769 /* Verify that we get the expected token back, with the correct
2770 location information. */
2771 const cpp_token *tok = test.get_token ();
2772 ASSERT_EQ (tok->type, CPP_STRING);
2773 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2774 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2776 /* At this point in lexing, the quote characters are treated as part of
2777 the string (they are stripped off by cpp_interpret_string). */
2778 ASSERT_EQ (tok->val.str.len, 15);
2780 /* Verify that cpp_interpret_string works. */
2781 cpp_string dst_string;
2782 const enum cpp_ttype type = CPP_STRING;
2783 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2784 &dst_string, type);
2785 ASSERT_TRUE (result);
2786 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2787 free (const_cast <unsigned char *> (dst_string.text));
2789 /* Verify ranges of individual characters. This no longer includes the
2790 opening quote, but does include the closing quote. */
2791 for (int i = 0; i <= 4; i++)
2792 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2793 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2794 for (int i = 6; i <= 10; i++)
2795 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2797 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2800 /* Lex a string literal containing an octal-escaped character.
2801 Verify the substring location data after running cpp_interpret_string
2802 on it. */
2804 static void
2805 test_lexer_string_locations_oct (const line_table_case &case_)
2807 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2808 and with a space in place of digit 6, to terminate the escaped
2809 octal code.
2810 ....................000000000.111111.11112222.2222223333333333444
2811 ....................123456789.012345.67890123.4567890123456789012 */
2812 const char *content = " \"01234\\065 789\" /* not a string */\n";
2813 lexer_test test (case_, content, NULL);
2815 /* Verify that we get the expected token back, with the correct
2816 location information. */
2817 const cpp_token *tok = test.get_token ();
2818 ASSERT_EQ (tok->type, CPP_STRING);
2819 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2821 /* Verify that cpp_interpret_string works. */
2822 cpp_string dst_string;
2823 const enum cpp_ttype type = CPP_STRING;
2824 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2825 &dst_string, type);
2826 ASSERT_TRUE (result);
2827 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2828 free (const_cast <unsigned char *> (dst_string.text));
2830 /* Verify ranges of individual characters. This no longer includes the
2831 opening quote, but does include the closing quote. */
2832 for (int i = 0; i < 5; i++)
2833 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2834 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2835 for (int i = 6; i <= 10; i++)
2836 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2838 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2841 /* Test of string literal containing letter escapes. */
2843 static void
2844 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2846 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2847 .....................000000000.1.11111.1.1.11222.22222223333333
2848 .....................123456789.0.12345.6.7.89012.34567890123456. */
2849 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2850 lexer_test test (case_, content, NULL);
2852 /* Verify that we get the expected tokens back. */
2853 const cpp_token *tok = test.get_token ();
2854 ASSERT_EQ (tok->type, CPP_STRING);
2855 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2857 /* Verify ranges of individual characters. */
2858 /* "\t". */
2859 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2860 0, 1, 10, 11);
2861 /* "foo". */
2862 for (int i = 1; i <= 3; i++)
2863 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2864 i, 1, 11 + i, 11 + i);
2865 /* "\\" and "\n". */
2866 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2867 4, 1, 15, 16);
2868 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2869 5, 1, 17, 18);
2871 /* "bar" and closing quote for nul-terminator. */
2872 for (int i = 6; i <= 9; i++)
2873 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2874 i, 1, 13 + i, 13 + i);
2876 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2879 /* Another test of a string literal containing a letter escape.
2880 Based on string seen in
2881 printf ("%-%\n");
2882 in gcc.dg/format/c90-printf-1.c. */
2884 static void
2885 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2887 /* .....................000000000.1111.11.1111.22222222223.
2888 .....................123456789.0123.45.6789.01234567890. */
2889 const char *content = (" \"%-%\\n\" /* non-str */\n");
2890 lexer_test test (case_, content, NULL);
2892 /* Verify that we get the expected tokens back. */
2893 const cpp_token *tok = test.get_token ();
2894 ASSERT_EQ (tok->type, CPP_STRING);
2895 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2897 /* Verify ranges of individual characters. */
2898 /* "%-%". */
2899 for (int i = 0; i < 3; i++)
2900 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2901 i, 1, 10 + i, 10 + i);
2902 /* "\n". */
2903 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2904 3, 1, 13, 14);
2906 /* Closing quote for nul-terminator. */
2907 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2908 4, 1, 15, 15);
2910 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2913 /* Lex a string literal containing UCN 4 characters.
2914 Verify the substring location data after running cpp_interpret_string
2915 on it. */
2917 static void
2918 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2920 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2921 as UCN 4.
2922 ....................000000000.111111.111122.222222223.33333333344444
2923 ....................123456789.012345.678901.234567890.12345678901234 */
2924 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2925 lexer_test test (case_, content, NULL);
2927 /* Verify that we get the expected token back, with the correct
2928 location information. */
2929 const cpp_token *tok = test.get_token ();
2930 ASSERT_EQ (tok->type, CPP_STRING);
2931 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2933 /* Verify that cpp_interpret_string works.
2934 The string should be encoded in the execution character
2935 set. Assuming that is UTF-8, we should have the following:
2936 ----------- ---- ----- ------- ----------------
2937 Byte offset Byte Octal Unicode Source Column(s)
2938 ----------- ---- ----- ------- ----------------
2939 0 0x30 '0' 10
2940 1 0x31 '1' 11
2941 2 0x32 '2' 12
2942 3 0x33 '3' 13
2943 4 0x34 '4' 14
2944 5 0xE2 \342 U+2174 15-20
2945 6 0x85 \205 (cont) 15-20
2946 7 0xB4 \264 (cont) 15-20
2947 8 0xE2 \342 U+2175 21-26
2948 9 0x85 \205 (cont) 21-26
2949 10 0xB5 \265 (cont) 21-26
2950 11 0x37 '7' 27
2951 12 0x38 '8' 28
2952 13 0x39 '9' 29
2953 14 0x00 30 (closing quote)
2954 ----------- ---- ----- ------- ---------------. */
2956 cpp_string dst_string;
2957 const enum cpp_ttype type = CPP_STRING;
2958 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2959 &dst_string, type);
2960 ASSERT_TRUE (result);
2961 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2962 (const char *)dst_string.text);
2963 free (const_cast <unsigned char *> (dst_string.text));
2965 /* Verify ranges of individual characters. This no longer includes the
2966 opening quote, but does include the closing quote.
2967 '01234'. */
2968 for (int i = 0; i <= 4; i++)
2969 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2970 /* U+2174. */
2971 for (int i = 5; i <= 7; i++)
2972 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2973 /* U+2175. */
2974 for (int i = 8; i <= 10; i++)
2975 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2976 /* '789' and nul terminator */
2977 for (int i = 11; i <= 14; i++)
2978 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2980 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2983 /* Lex a string literal containing UCN 8 characters.
2984 Verify the substring location data after running cpp_interpret_string
2985 on it. */
2987 static void
2988 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2990 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2991 ....................000000000.111111.1111222222.2222333333333.344444
2992 ....................123456789.012345.6789012345.6789012345678.901234 */
2993 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2994 lexer_test test (case_, content, NULL);
2996 /* Verify that we get the expected token back, with the correct
2997 location information. */
2998 const cpp_token *tok = test.get_token ();
2999 ASSERT_EQ (tok->type, CPP_STRING);
3000 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
3001 "\"01234\\U00002174\\U00002175789\"");
3003 /* Verify that cpp_interpret_string works.
3004 The UTF-8 encoding of the string is identical to that from
3005 the ucn4 testcase above; the only difference is the column
3006 locations. */
3007 cpp_string dst_string;
3008 const enum cpp_ttype type = CPP_STRING;
3009 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3010 &dst_string, type);
3011 ASSERT_TRUE (result);
3012 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3013 (const char *)dst_string.text);
3014 free (const_cast <unsigned char *> (dst_string.text));
3016 /* Verify ranges of individual characters. This no longer includes the
3017 opening quote, but does include the closing quote.
3018 '01234'. */
3019 for (int i = 0; i <= 4; i++)
3020 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3021 /* U+2174. */
3022 for (int i = 5; i <= 7; i++)
3023 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
3024 /* U+2175. */
3025 for (int i = 8; i <= 10; i++)
3026 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
3027 /* '789' at columns 35-37 */
3028 for (int i = 11; i <= 13; i++)
3029 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
3030 /* Closing quote/nul-terminator at column 38. */
3031 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
3033 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3036 /* Fetch a big-endian 32-bit value and convert to host endianness. */
3038 static uint32_t
3039 uint32_from_big_endian (const uint32_t *ptr_be_value)
3041 const unsigned char *buf = (const unsigned char *)ptr_be_value;
3042 return (((uint32_t) buf[0] << 24)
3043 | ((uint32_t) buf[1] << 16)
3044 | ((uint32_t) buf[2] << 8)
3045 | (uint32_t) buf[3]);
3048 /* Lex a wide string literal and verify that attempts to read substring
3049 location data from it fail gracefully. */
3051 static void
3052 test_lexer_string_locations_wide_string (const line_table_case &case_)
3054 /* Digits 0-9.
3055 ....................000000000.11111111112.22222222233333
3056 ....................123456789.01234567890.12345678901234 */
3057 const char *content = " L\"0123456789\" /* non-str */\n";
3058 lexer_test test (case_, content, NULL);
3060 /* Verify that we get the expected token back, with the correct
3061 location information. */
3062 const cpp_token *tok = test.get_token ();
3063 ASSERT_EQ (tok->type, CPP_WSTRING);
3064 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
3066 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
3067 cpp_string dst_string;
3068 const enum cpp_ttype type = CPP_WSTRING;
3069 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3070 &dst_string, type);
3071 ASSERT_TRUE (result);
3072 /* The cpp_reader defaults to big-endian with
3073 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3074 now be encoded as UTF-32BE. */
3075 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3076 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3077 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3078 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3079 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3080 free (const_cast <unsigned char *> (dst_string.text));
3082 /* We don't yet support generating substring location information
3083 for L"" strings. */
3084 ASSERT_HAS_NO_SUBSTRING_RANGES
3085 (test, tok->src_loc, type,
3086 "execution character set != source character set");
3089 /* Fetch a big-endian 16-bit value and convert to host endianness. */
3091 static uint16_t
3092 uint16_from_big_endian (const uint16_t *ptr_be_value)
3094 const unsigned char *buf = (const unsigned char *)ptr_be_value;
3095 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3098 /* Lex a u"" string literal and verify that attempts to read substring
3099 location data from it fail gracefully. */
3101 static void
3102 test_lexer_string_locations_string16 (const line_table_case &case_)
3104 /* Digits 0-9.
3105 ....................000000000.11111111112.22222222233333
3106 ....................123456789.01234567890.12345678901234 */
3107 const char *content = " u\"0123456789\" /* non-str */\n";
3108 lexer_test test (case_, content, NULL);
3110 /* Verify that we get the expected token back, with the correct
3111 location information. */
3112 const cpp_token *tok = test.get_token ();
3113 ASSERT_EQ (tok->type, CPP_STRING16);
3114 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3116 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
3117 cpp_string dst_string;
3118 const enum cpp_ttype type = CPP_STRING16;
3119 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3120 &dst_string, type);
3121 ASSERT_TRUE (result);
3123 /* The cpp_reader defaults to big-endian, so dst_string should
3124 now be encoded as UTF-16BE. */
3125 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3126 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3127 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3128 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3129 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3130 free (const_cast <unsigned char *> (dst_string.text));
3132 /* We don't yet support generating substring location information
3133 for L"" strings. */
3134 ASSERT_HAS_NO_SUBSTRING_RANGES
3135 (test, tok->src_loc, type,
3136 "execution character set != source character set");
3139 /* Lex a U"" string literal and verify that attempts to read substring
3140 location data from it fail gracefully. */
3142 static void
3143 test_lexer_string_locations_string32 (const line_table_case &case_)
3145 /* Digits 0-9.
3146 ....................000000000.11111111112.22222222233333
3147 ....................123456789.01234567890.12345678901234 */
3148 const char *content = " U\"0123456789\" /* non-str */\n";
3149 lexer_test test (case_, content, NULL);
3151 /* Verify that we get the expected token back, with the correct
3152 location information. */
3153 const cpp_token *tok = test.get_token ();
3154 ASSERT_EQ (tok->type, CPP_STRING32);
3155 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3157 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
3158 cpp_string dst_string;
3159 const enum cpp_ttype type = CPP_STRING32;
3160 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3161 &dst_string, type);
3162 ASSERT_TRUE (result);
3164 /* The cpp_reader defaults to big-endian, so dst_string should
3165 now be encoded as UTF-32BE. */
3166 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3167 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3168 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3169 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3170 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3171 free (const_cast <unsigned char *> (dst_string.text));
3173 /* We don't yet support generating substring location information
3174 for L"" strings. */
3175 ASSERT_HAS_NO_SUBSTRING_RANGES
3176 (test, tok->src_loc, type,
3177 "execution character set != source character set");
3180 /* Lex a u8-string literal.
3181 Verify the substring location data after running cpp_interpret_string
3182 on it. */
3184 static void
3185 test_lexer_string_locations_u8 (const line_table_case &case_)
3187 /* Digits 0-9.
3188 ....................000000000.11111111112.22222222233333
3189 ....................123456789.01234567890.12345678901234 */
3190 const char *content = " u8\"0123456789\" /* non-str */\n";
3191 lexer_test test (case_, content, NULL);
3193 /* Verify that we get the expected token back, with the correct
3194 location information. */
3195 const cpp_token *tok = test.get_token ();
3196 ASSERT_EQ (tok->type, CPP_UTF8STRING);
3197 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3199 /* Verify that cpp_interpret_string works. */
3200 cpp_string dst_string;
3201 const enum cpp_ttype type = CPP_STRING;
3202 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3203 &dst_string, type);
3204 ASSERT_TRUE (result);
3205 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3206 free (const_cast <unsigned char *> (dst_string.text));
3208 /* Verify ranges of individual characters. This no longer includes the
3209 opening quote, but does include the closing quote. */
3210 for (int i = 0; i <= 10; i++)
3211 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3214 /* Lex a string literal containing UTF-8 source characters.
3215 Verify the substring location data after running cpp_interpret_string
3216 on it. */
3218 static void
3219 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3221 /* This string literal is written out to the source file as UTF-8,
3222 and is of the form "before mojibake after", where "mojibake"
3223 is written as the following four unicode code points:
3224 U+6587 CJK UNIFIED IDEOGRAPH-6587
3225 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3226 U+5316 CJK UNIFIED IDEOGRAPH-5316
3227 U+3051 HIRAGANA LETTER KE.
3228 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3229 "before" and "after" are 1 byte per unicode character.
3231 The numbering shown are "columns", which are *byte* numbers within
3232 the line, rather than unicode character numbers.
3234 .................... 000000000.1111111.
3235 .................... 123456789.0123456. */
3236 const char *content = (" \"before "
3237 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3238 UTF-8: 0xE6 0x96 0x87
3239 C octal escaped UTF-8: \346\226\207
3240 "column" numbers: 17-19. */
3241 "\346\226\207"
3243 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3244 UTF-8: 0xE5 0xAD 0x97
3245 C octal escaped UTF-8: \345\255\227
3246 "column" numbers: 20-22. */
3247 "\345\255\227"
3249 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3250 UTF-8: 0xE5 0x8C 0x96
3251 C octal escaped UTF-8: \345\214\226
3252 "column" numbers: 23-25. */
3253 "\345\214\226"
3255 /* U+3051 HIRAGANA LETTER KE
3256 UTF-8: 0xE3 0x81 0x91
3257 C octal escaped UTF-8: \343\201\221
3258 "column" numbers: 26-28. */
3259 "\343\201\221"
3261 /* column numbers 29 onwards
3262 2333333.33334444444444
3263 9012345.67890123456789. */
3264 " after\" /* non-str */\n");
3265 lexer_test test (case_, content, NULL);
3267 /* Verify that we get the expected token back, with the correct
3268 location information. */
3269 const cpp_token *tok = test.get_token ();
3270 ASSERT_EQ (tok->type, CPP_STRING);
3271 ASSERT_TOKEN_AS_TEXT_EQ
3272 (test.m_parser, tok,
3273 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3275 /* Verify that cpp_interpret_string works. */
3276 cpp_string dst_string;
3277 const enum cpp_ttype type = CPP_STRING;
3278 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3279 &dst_string, type);
3280 ASSERT_TRUE (result);
3281 ASSERT_STREQ
3282 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3283 (const char *)dst_string.text);
3284 free (const_cast <unsigned char *> (dst_string.text));
3286 /* Verify ranges of individual characters. This no longer includes the
3287 opening quote, but does include the closing quote.
3288 Assuming that both source and execution encodings are UTF-8, we have
3289 a run of 25 octets in each, plus the NUL terminator. */
3290 for (int i = 0; i < 25; i++)
3291 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3292 /* NUL-terminator should use the closing quote at column 35. */
3293 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3295 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3298 /* Test of string literal concatenation. */
3300 static void
3301 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3303 /* Digits 0-9.
3304 .....................000000000.111111.11112222222222
3305 .....................123456789.012345.67890123456789. */
3306 const char *content = (" \"01234\" /* non-str */\n"
3307 " \"56789\" /* non-str */\n");
3308 lexer_test test (case_, content, NULL);
3310 location_t input_locs[2];
3312 /* Verify that we get the expected tokens back. */
3313 auto_vec <cpp_string> input_strings;
3314 const cpp_token *tok_a = test.get_token ();
3315 ASSERT_EQ (tok_a->type, CPP_STRING);
3316 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3317 input_strings.safe_push (tok_a->val.str);
3318 input_locs[0] = tok_a->src_loc;
3320 const cpp_token *tok_b = test.get_token ();
3321 ASSERT_EQ (tok_b->type, CPP_STRING);
3322 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3323 input_strings.safe_push (tok_b->val.str);
3324 input_locs[1] = tok_b->src_loc;
3326 /* Verify that cpp_interpret_string works. */
3327 cpp_string dst_string;
3328 const enum cpp_ttype type = CPP_STRING;
3329 bool result = cpp_interpret_string (test.m_parser,
3330 input_strings.address (), 2,
3331 &dst_string, type);
3332 ASSERT_TRUE (result);
3333 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3334 free (const_cast <unsigned char *> (dst_string.text));
3336 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3337 test.m_concats.record_string_concatenation (2, input_locs);
3339 location_t initial_loc = input_locs[0];
3341 /* "01234" on line 1. */
3342 for (int i = 0; i <= 4; i++)
3343 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3344 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3345 for (int i = 5; i <= 10; i++)
3346 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3348 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3351 /* Another test of string literal concatenation. */
3353 static void
3354 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3356 /* Digits 0-9.
3357 .....................000000000.111.11111112222222
3358 .....................123456789.012.34567890123456. */
3359 const char *content = (" \"01\" /* non-str */\n"
3360 " \"23\" /* non-str */\n"
3361 " \"45\" /* non-str */\n"
3362 " \"67\" /* non-str */\n"
3363 " \"89\" /* non-str */\n");
3364 lexer_test test (case_, content, NULL);
3366 auto_vec <cpp_string> input_strings;
3367 location_t input_locs[5];
3369 /* Verify that we get the expected tokens back. */
3370 for (int i = 0; i < 5; i++)
3372 const cpp_token *tok = test.get_token ();
3373 ASSERT_EQ (tok->type, CPP_STRING);
3374 input_strings.safe_push (tok->val.str);
3375 input_locs[i] = tok->src_loc;
3378 /* Verify that cpp_interpret_string works. */
3379 cpp_string dst_string;
3380 const enum cpp_ttype type = CPP_STRING;
3381 bool result = cpp_interpret_string (test.m_parser,
3382 input_strings.address (), 5,
3383 &dst_string, type);
3384 ASSERT_TRUE (result);
3385 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3386 free (const_cast <unsigned char *> (dst_string.text));
3388 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3389 test.m_concats.record_string_concatenation (5, input_locs);
3391 location_t initial_loc = input_locs[0];
3393 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3394 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3395 and expect get_source_range_for_substring to fail.
3396 However, for a string concatenation test, we can have a case
3397 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3398 but subsequent strings can be after it.
3399 Attempting to detect this within assert_char_at_range
3400 would overcomplicate the logic for the common test cases, so
3401 we detect it here. */
3402 if (should_have_column_data_p (input_locs[0])
3403 && !should_have_column_data_p (input_locs[4]))
3405 /* Verify that get_source_range_for_substring gracefully rejects
3406 this case. */
3407 source_range actual_range;
3408 const char *err
3409 = get_source_range_for_char (test.m_parser, &test.m_concats,
3410 initial_loc, type, 0, &actual_range);
3411 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3412 return;
3415 for (int i = 0; i < 5; i++)
3416 for (int j = 0; j < 2; j++)
3417 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3418 i + 1, 10 + j, 10 + j);
3420 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3421 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3423 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3426 /* Another test of string literal concatenation, this time combined with
3427 various kinds of escaped characters. */
3429 static void
3430 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3432 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3433 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3434 const char *content
3435 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3436 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3437 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3438 lexer_test test (case_, content, NULL);
3440 auto_vec <cpp_string> input_strings;
3441 location_t input_locs[4];
3443 /* Verify that we get the expected tokens back. */
3444 for (int i = 0; i < 4; i++)
3446 const cpp_token *tok = test.get_token ();
3447 ASSERT_EQ (tok->type, CPP_STRING);
3448 input_strings.safe_push (tok->val.str);
3449 input_locs[i] = tok->src_loc;
3452 /* Verify that cpp_interpret_string works. */
3453 cpp_string dst_string;
3454 const enum cpp_ttype type = CPP_STRING;
3455 bool result = cpp_interpret_string (test.m_parser,
3456 input_strings.address (), 4,
3457 &dst_string, type);
3458 ASSERT_TRUE (result);
3459 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3460 free (const_cast <unsigned char *> (dst_string.text));
3462 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3463 test.m_concats.record_string_concatenation (4, input_locs);
3465 location_t initial_loc = input_locs[0];
3467 for (int i = 0; i <= 4; i++)
3468 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3469 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3470 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3471 for (int i = 7; i <= 9; i++)
3472 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3474 /* NUL-terminator should use the location of the final closing quote. */
3475 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3477 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3480 /* Test of string literal in a macro. */
3482 static void
3483 test_lexer_string_locations_macro (const line_table_case &case_)
3485 /* Digits 0-9.
3486 .....................0000000001111111111.22222222223.
3487 .....................1234567890123456789.01234567890. */
3488 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3489 " MACRO");
3490 lexer_test test (case_, content, NULL);
3492 /* Verify that we get the expected tokens back. */
3493 const cpp_token *tok = test.get_token ();
3494 ASSERT_EQ (tok->type, CPP_PADDING);
3496 tok = test.get_token ();
3497 ASSERT_EQ (tok->type, CPP_STRING);
3498 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3500 /* Verify ranges of individual characters. We ought to
3501 see columns within the macro definition. */
3502 for (int i = 0; i <= 10; i++)
3503 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3504 i, 1, 20 + i, 20 + i);
3506 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3508 tok = test.get_token ();
3509 ASSERT_EQ (tok->type, CPP_PADDING);
3512 /* Test of stringification of a macro argument. */
3514 static void
3515 test_lexer_string_locations_stringified_macro_argument
3516 (const line_table_case &case_)
3518 /* .....................000000000111111111122222222223.
3519 .....................123456789012345678901234567890. */
3520 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3521 "MACRO(foo)\n");
3522 lexer_test test (case_, content, NULL);
3524 /* Verify that we get the expected token back. */
3525 const cpp_token *tok = test.get_token ();
3526 ASSERT_EQ (tok->type, CPP_PADDING);
3528 tok = test.get_token ();
3529 ASSERT_EQ (tok->type, CPP_STRING);
3530 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3532 /* We don't support getting the location of a stringified macro
3533 argument. Verify that it fails gracefully. */
3534 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3535 "cpp_interpret_string_1 failed");
3537 tok = test.get_token ();
3538 ASSERT_EQ (tok->type, CPP_PADDING);
3540 tok = test.get_token ();
3541 ASSERT_EQ (tok->type, CPP_PADDING);
3544 /* Ensure that we are fail gracefully if something attempts to pass
3545 in a location that isn't a string literal token. Seen on this code:
3547 const char a[] = " %d ";
3548 __builtin_printf (a, 0.5);
3551 when c-format.cc erroneously used the indicated one-character
3552 location as the format string location, leading to a read past the
3553 end of a string buffer in cpp_interpret_string_1. */
3555 static void
3556 test_lexer_string_locations_non_string (const line_table_case &case_)
3558 /* .....................000000000111111111122222222223.
3559 .....................123456789012345678901234567890. */
3560 const char *content = (" a\n");
3561 lexer_test test (case_, content, NULL);
3563 /* Verify that we get the expected token back. */
3564 const cpp_token *tok = test.get_token ();
3565 ASSERT_EQ (tok->type, CPP_NAME);
3566 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3568 /* At this point, libcpp is attempting to interpret the name as a
3569 string literal, despite it not starting with a quote. We don't detect
3570 that, but we should at least fail gracefully. */
3571 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3572 "cpp_interpret_string_1 failed");
3575 /* Ensure that we can read substring information for a token which
3576 starts in one linemap and ends in another . Adapted from
3577 gcc.dg/cpp/pr69985.c. */
3579 static void
3580 test_lexer_string_locations_long_line (const line_table_case &case_)
3582 /* .....................000000.000111111111
3583 .....................123456.789012346789. */
3584 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3585 " \"0123456789012345678901234567890123456789"
3586 "0123456789012345678901234567890123456789"
3587 "0123456789012345678901234567890123456789"
3588 "0123456789\"\n");
3590 lexer_test test (case_, content, NULL);
3592 /* Verify that we get the expected token back. */
3593 const cpp_token *tok = test.get_token ();
3594 ASSERT_EQ (tok->type, CPP_STRING);
3596 if (!should_have_column_data_p (line_table->highest_location))
3597 return;
3599 /* Verify ranges of individual characters. */
3600 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3601 for (int i = 0; i < 131; i++)
3602 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3603 i, 2, 7 + i, 7 + i);
3606 /* Test of locations within a raw string that doesn't contain a newline. */
3608 static void
3609 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3611 /* .....................00.0000000111111111122.
3612 .....................12.3456789012345678901. */
3613 const char *content = ("R\"foo(0123456789)foo\"\n");
3614 lexer_test test (case_, content, NULL);
3616 /* Verify that we get the expected token back. */
3617 const cpp_token *tok = test.get_token ();
3618 ASSERT_EQ (tok->type, CPP_STRING);
3620 /* Verify that cpp_interpret_string works. */
3621 cpp_string dst_string;
3622 const enum cpp_ttype type = CPP_STRING;
3623 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3624 &dst_string, type);
3625 ASSERT_TRUE (result);
3626 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3627 free (const_cast <unsigned char *> (dst_string.text));
3629 if (!should_have_column_data_p (line_table->highest_location))
3630 return;
3632 /* 0-9, plus the nil terminator. */
3633 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3634 for (int i = 0; i < 11; i++)
3635 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3636 i, 1, 7 + i, 7 + i);
3639 /* Test of locations within a raw string that contains a newline. */
3641 static void
3642 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3644 /* .....................00.0000.
3645 .....................12.3456. */
3646 const char *content = ("R\"foo(\n"
3647 /* .....................00000.
3648 .....................12345. */
3649 "hello\n"
3650 "world\n"
3651 /* .....................00000.
3652 .....................12345. */
3653 ")foo\"\n");
3654 lexer_test test (case_, content, NULL);
3656 /* Verify that we get the expected token back. */
3657 const cpp_token *tok = test.get_token ();
3658 ASSERT_EQ (tok->type, CPP_STRING);
3660 /* Verify that cpp_interpret_string works. */
3661 cpp_string dst_string;
3662 const enum cpp_ttype type = CPP_STRING;
3663 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3664 &dst_string, type);
3665 ASSERT_TRUE (result);
3666 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3667 free (const_cast <unsigned char *> (dst_string.text));
3669 if (!should_have_column_data_p (line_table->highest_location))
3670 return;
3672 /* Currently we don't support locations within raw strings that
3673 contain newlines. */
3674 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3675 "range endpoints are on different lines");
3678 /* Test of parsing an unterminated raw string. */
3680 static void
3681 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3683 const char *content = "R\"ouch()ouCh\" /* etc */";
3685 lexer_diagnostic_sink diagnostics;
3686 lexer_test test (case_, content, &diagnostics);
3687 test.m_implicitly_expect_EOF = false;
3689 /* Attempt to parse the raw string. */
3690 const cpp_token *tok = test.get_token ();
3691 ASSERT_EQ (tok->type, CPP_EOF);
3693 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3694 /* We expect the message "unterminated raw string"
3695 in the "cpplib" translation domain.
3696 It's not clear that dgettext is available on all supported hosts,
3697 so this assertion is commented-out for now.
3698 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3699 diagnostics.m_diagnostics[0]);
3703 /* Test of lexing char constants. */
3705 static void
3706 test_lexer_char_constants (const line_table_case &case_)
3708 /* Various char constants.
3709 .....................0000000001111111111.22222222223.
3710 .....................1234567890123456789.01234567890. */
3711 const char *content = (" 'a'\n"
3712 " u'a'\n"
3713 " U'a'\n"
3714 " L'a'\n"
3715 " 'abc'\n");
3716 lexer_test test (case_, content, NULL);
3718 /* Verify that we get the expected tokens back. */
3719 /* 'a'. */
3720 const cpp_token *tok = test.get_token ();
3721 ASSERT_EQ (tok->type, CPP_CHAR);
3722 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3724 unsigned int chars_seen;
3725 int unsignedp;
3726 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3727 &chars_seen, &unsignedp);
3728 ASSERT_EQ (cc, 'a');
3729 ASSERT_EQ (chars_seen, 1);
3731 /* u'a'. */
3732 tok = test.get_token ();
3733 ASSERT_EQ (tok->type, CPP_CHAR16);
3734 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3736 /* U'a'. */
3737 tok = test.get_token ();
3738 ASSERT_EQ (tok->type, CPP_CHAR32);
3739 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3741 /* L'a'. */
3742 tok = test.get_token ();
3743 ASSERT_EQ (tok->type, CPP_WCHAR);
3744 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3746 /* 'abc' (c-char-sequence). */
3747 tok = test.get_token ();
3748 ASSERT_EQ (tok->type, CPP_CHAR);
3749 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3751 /* A table of interesting location_t values, giving one axis of our test
3752 matrix. */
3754 static const location_t boundary_locations[] = {
3755 /* Zero means "don't override the default values for a new line_table". */
3758 /* An arbitrary non-zero value that isn't close to one of
3759 the boundary values below. */
3760 0x10000,
3762 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3763 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3764 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3765 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3766 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3767 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3769 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3770 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3771 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3772 LINE_MAP_MAX_LOCATION_WITH_COLS,
3773 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3774 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3777 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3779 void
3780 for_each_line_table_case (void (*testcase) (const line_table_case &))
3782 /* As noted above in the description of struct line_table_case,
3783 we want to explore a test matrix of interesting line_table
3784 situations, running various selftests for each case within the
3785 matrix. */
3787 /* Run all tests with:
3788 (a) line_table->default_range_bits == 0, and
3789 (b) line_table->default_range_bits == 5. */
3790 int num_cases_tested = 0;
3791 for (int default_range_bits = 0; default_range_bits <= 5;
3792 default_range_bits += 5)
3794 /* ...and use each of the "interesting" location values as
3795 the starting location within line_table. */
3796 const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
3797 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3799 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3801 testcase (c);
3803 num_cases_tested++;
3807 /* Verify that we fully covered the test matrix. */
3808 ASSERT_EQ (num_cases_tested, 2 * 12);
3811 /* Verify that when presented with a consecutive pair of locations with
3812 a very large line offset, we don't attempt to consolidate them into
3813 a single ordinary linemap where the line offsets within the line map
3814 would lead to overflow (PR lto/88147). */
3816 static void
3817 test_line_offset_overflow ()
3819 line_table_test ltt (line_table_case (5, 0));
3821 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3822 linemap_line_start (line_table, 1, 100);
3823 location_t loc_a = linemap_line_start (line_table, 2578, 255);
3824 assert_loceq ("foo.c", 2578, 0, loc_a);
3826 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3827 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3828 ASSERT_EQ (ordmap_a->m_range_bits, 5);
3830 location_t loc_b = linemap_line_start (line_table, 404198, 512);
3831 assert_loceq ("foo.c", 404198, 0, loc_b);
3833 /* We should have started a new linemap, rather than attempting to store
3834 a very large line offset. */
3835 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3836 ASSERT_NE (ordmap_a, ordmap_b);
3839 void test_cpp_utf8 ()
3841 const int def_tabstop = 8;
3842 cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3844 /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
3846 int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
3847 ASSERT_EQ (8, w_bad);
3848 int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
3849 ASSERT_EQ (5, w_ctrl);
3852 /* Verify that wcwidth of valid UTF-8 is as expected. */
3854 const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
3855 ASSERT_EQ (1, w_pi);
3856 const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
3857 ASSERT_EQ (2, w_emoji);
3858 const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3859 policy);
3860 ASSERT_EQ (1, w_umlaut_precomposed);
3861 const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3862 policy);
3863 ASSERT_EQ (1, w_umlaut_combining);
3864 const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
3865 ASSERT_EQ (2, w_han);
3866 const int w_ascii = cpp_display_width ("GCC", 3, policy);
3867 ASSERT_EQ (3, w_ascii);
3868 const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3869 "\x9f! \xe4\xb8\xba y\xcc\x88",
3870 24, policy);
3871 ASSERT_EQ (18, w_mixed);
3874 /* Verify that display width properly expands tabs. */
3876 const char *tstr = "\tabc\td";
3877 ASSERT_EQ (6, cpp_display_width (tstr, 6,
3878 cpp_char_column_policy (1, cpp_wcwidth)));
3879 ASSERT_EQ (10, cpp_display_width (tstr, 6,
3880 cpp_char_column_policy (3, cpp_wcwidth)));
3881 ASSERT_EQ (17, cpp_display_width (tstr, 6,
3882 cpp_char_column_policy (8, cpp_wcwidth)));
3883 ASSERT_EQ (1,
3884 cpp_display_column_to_byte_column
3885 (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
3888 /* Verify that cpp_byte_column_to_display_column can go past the end,
3889 and similar edge cases. */
3891 const char *str
3892 /* Display columns.
3893 111111112345 */
3894 = "\xcf\x80 abc";
3895 /* 111122223456
3896 Byte columns. */
3898 ASSERT_EQ (5, cpp_display_width (str, 6, policy));
3899 ASSERT_EQ (105,
3900 cpp_byte_column_to_display_column (str, 6, 106, policy));
3901 ASSERT_EQ (10000,
3902 cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
3903 ASSERT_EQ (0,
3904 cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
3907 /* Verify that cpp_display_column_to_byte_column can go past the end,
3908 and similar edge cases, and check invertibility. */
3910 const char *str
3911 /* Display columns.
3912 000000000000000000000000000000000000011
3913 111111112222222234444444455555555678901 */
3914 = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
3915 /* 000000000000000000000000000000000111111
3916 111122223333444456666777788889999012345
3917 Byte columns. */
3918 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
3919 ASSERT_EQ (15,
3920 cpp_display_column_to_byte_column (str, 15, 11, policy));
3921 ASSERT_EQ (115,
3922 cpp_display_column_to_byte_column (str, 15, 111, policy));
3923 ASSERT_EQ (10000,
3924 cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
3925 ASSERT_EQ (0,
3926 cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
3928 /* Verify that we do not interrupt a UTF-8 sequence. */
3929 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
3931 for (int byte_col = 1; byte_col <= 15; ++byte_col)
3933 const int disp_col
3934 = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
3935 const int byte_col2
3936 = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
3938 /* If we ask for the display column in the middle of a UTF-8
3939 sequence, it will return the length of the partial sequence,
3940 matching the behavior of GCC before display column support.
3941 Otherwise check the round trip was successful. */
3942 if (byte_col < 4)
3943 ASSERT_EQ (byte_col, disp_col);
3944 else if (byte_col >= 6 && byte_col < 9)
3945 ASSERT_EQ (3 + (byte_col - 5), disp_col);
3946 else
3947 ASSERT_EQ (byte_col2, byte_col);
3953 /* Run all of the selftests within this file. */
3955 void
3956 input_cc_tests ()
3958 test_linenum_comparisons ();
3959 test_should_have_column_data_p ();
3960 test_unknown_location ();
3961 test_builtins ();
3962 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3964 for_each_line_table_case (test_accessing_ordinary_linemaps);
3965 for_each_line_table_case (test_lexer);
3966 for_each_line_table_case (test_lexer_string_locations_simple);
3967 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3968 for_each_line_table_case (test_lexer_string_locations_hex);
3969 for_each_line_table_case (test_lexer_string_locations_oct);
3970 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3971 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3972 for_each_line_table_case (test_lexer_string_locations_ucn4);
3973 for_each_line_table_case (test_lexer_string_locations_ucn8);
3974 for_each_line_table_case (test_lexer_string_locations_wide_string);
3975 for_each_line_table_case (test_lexer_string_locations_string16);
3976 for_each_line_table_case (test_lexer_string_locations_string32);
3977 for_each_line_table_case (test_lexer_string_locations_u8);
3978 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3979 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3980 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3981 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3982 for_each_line_table_case (test_lexer_string_locations_macro);
3983 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3984 for_each_line_table_case (test_lexer_string_locations_non_string);
3985 for_each_line_table_case (test_lexer_string_locations_long_line);
3986 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3987 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3988 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3989 for_each_line_table_case (test_lexer_char_constants);
3991 test_reading_source_line ();
3993 test_line_offset_overflow ();
3995 test_cpp_utf8 ();
3998 } // namespace selftest
4000 #endif /* CHECKING_P */