Suppress -fstack-protector warning on hppa.
[official-gcc.git] / gcc / input.cc
blob18777a813b0ad78242efc8ba9915a2856cc5d0c9
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2022 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic.h"
25 #include "selftest.h"
26 #include "cpplib.h"
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
32 const char *
33 special_fname_builtin ()
35 return _("<built-in>");
38 /* Input charset configuration. */
39 static const char *default_charset_callback (const char *)
41 return nullptr;
44 void
45 file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
46 bool should_skip_bom)
48 in_context.ccb = (ccb ? ccb : default_charset_callback);
49 in_context.should_skip_bom = should_skip_bom;
52 /* This is a cache used by get_next_line to store the content of a
53 file to be searched for file lines. */
54 class file_cache_slot
56 public:
57 file_cache_slot ();
58 ~file_cache_slot ();
60 bool read_line_num (size_t line_num,
61 char ** line, ssize_t *line_len);
63 /* Accessors. */
64 const char *get_file_path () const { return m_file_path; }
65 unsigned get_use_count () const { return m_use_count; }
66 bool missing_trailing_newline_p () const
68 return m_missing_trailing_newline;
71 void inc_use_count () { m_use_count++; }
73 bool create (const file_cache::input_context &in_context,
74 const char *file_path, FILE *fp, unsigned highest_use_count);
75 void evict ();
77 private:
78 /* These are information used to store a line boundary. */
79 class line_info
81 public:
82 /* The line number. It starts from 1. */
83 size_t line_num;
85 /* The position (byte count) of the beginning of the line,
86 relative to the file data pointer. This starts at zero. */
87 size_t start_pos;
89 /* The position (byte count) of the last byte of the line. This
90 normally points to the '\n' character, or to one byte after the
91 last byte of the file, if the file doesn't contain a '\n'
92 character. */
93 size_t end_pos;
95 line_info (size_t l, size_t s, size_t e)
96 : line_num (l), start_pos (s), end_pos (e)
99 line_info ()
100 :line_num (0), start_pos (0), end_pos (0)
104 bool needs_read_p () const;
105 bool needs_grow_p () const;
106 void maybe_grow ();
107 bool read_data ();
108 bool maybe_read_data ();
109 bool get_next_line (char **line, ssize_t *line_len);
110 bool read_next_line (char ** line, ssize_t *line_len);
111 bool goto_next_line ();
113 static const size_t buffer_size = 4 * 1024;
114 static const size_t line_record_size = 100;
116 /* The number of time this file has been accessed. This is used
117 to designate which file cache to evict from the cache
118 array. */
119 unsigned m_use_count;
121 /* The file_path is the key for identifying a particular file in
122 the cache.
123 For libcpp-using code, the underlying buffer for this field is
124 owned by the corresponding _cpp_file within the cpp_reader. */
125 const char *m_file_path;
127 FILE *m_fp;
129 /* This points to the content of the file that we've read so
130 far. */
131 char *m_data;
133 /* The allocated buffer to be freed may start a little earlier than DATA,
134 e.g. if a UTF8 BOM was skipped at the beginning. */
135 int m_alloc_offset;
137 /* The size of the DATA array above.*/
138 size_t m_size;
140 /* The number of bytes read from the underlying file so far. This
141 must be less (or equal) than SIZE above. */
142 size_t m_nb_read;
144 /* The index of the beginning of the current line. */
145 size_t m_line_start_idx;
147 /* The number of the previous line read. This starts at 1. Zero
148 means we've read no line so far. */
149 size_t m_line_num;
151 /* This is the total number of lines of the current file. At the
152 moment, we try to get this information from the line map
153 subsystem. Note that this is just a hint. When using the C++
154 front-end, this hint is correct because the input file is then
155 completely tokenized before parsing starts; so the line map knows
156 the number of lines before compilation really starts. For e.g,
157 the C front-end, it can happen that we start emitting diagnostics
158 before the line map has seen the end of the file. */
159 size_t m_total_lines;
161 /* Could this file be missing a trailing newline on its final line?
162 Initially true (to cope with empty files), set to true/false
163 as each line is read. */
164 bool m_missing_trailing_newline;
166 /* This is a record of the beginning and end of the lines we've seen
167 while reading the file. This is useful to avoid walking the data
168 from the beginning when we are asked to read a line that is
169 before LINE_START_IDX above. Note that the maximum size of this
170 record is line_record_size, so that the memory consumption
171 doesn't explode. We thus scale total_lines down to
172 line_record_size. */
173 vec<line_info, va_heap> m_line_record;
175 void offset_buffer (int offset)
177 gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
178 : (size_t) offset <= m_size);
179 gcc_assert (m_data);
180 m_alloc_offset += offset;
181 m_data += offset;
182 m_size -= offset;
187 /* Current position in real source file. */
189 location_t input_location = UNKNOWN_LOCATION;
191 class line_maps *line_table;
193 /* A stashed copy of "line_table" for use by selftest::line_table_test.
194 This needs to be a global so that it can be a GC root, and thus
195 prevent the stashed copy from being garbage-collected if the GC runs
196 during a line_table_test. */
198 class line_maps *saved_line_table;
200 /* Expand the source location LOC into a human readable location. If
201 LOC resolves to a builtin location, the file name of the readable
202 location is set to the string "<built-in>". If EXPANSION_POINT_P is
203 TRUE and LOC is virtual, then it is resolved to the expansion
204 point of the involved macro. Otherwise, it is resolved to the
205 spelling location of the token.
207 When resolving to the spelling location of the token, if the
208 resulting location is for a built-in location (that is, it has no
209 associated line/column) in the context of a macro expansion, the
210 returned location is the first one (while unwinding the macro
211 location towards its expansion point) that is in real source
212 code.
214 ASPECT controls which part of the location to use. */
216 static expanded_location
217 expand_location_1 (location_t loc,
218 bool expansion_point_p,
219 enum location_aspect aspect)
221 expanded_location xloc;
222 const line_map_ordinary *map;
223 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
224 tree block = NULL;
226 if (IS_ADHOC_LOC (loc))
228 block = LOCATION_BLOCK (loc);
229 loc = LOCATION_LOCUS (loc);
232 memset (&xloc, 0, sizeof (xloc));
234 if (loc >= RESERVED_LOCATION_COUNT)
236 if (!expansion_point_p)
238 /* We want to resolve LOC to its spelling location.
240 But if that spelling location is a reserved location that
241 appears in the context of a macro expansion (like for a
242 location for a built-in token), let's consider the first
243 location (toward the expansion point) that is not reserved;
244 that is, the first location that is in real source code. */
245 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
246 loc, NULL);
247 lrk = LRK_SPELLING_LOCATION;
249 loc = linemap_resolve_location (line_table, loc, lrk, &map);
251 /* loc is now either in an ordinary map, or is a reserved location.
252 If it is a compound location, the caret is in a spelling location,
253 but the start/finish might still be a virtual location.
254 Depending of what the caller asked for, we may need to recurse
255 one level in order to resolve any virtual locations in the
256 end-points. */
257 switch (aspect)
259 default:
260 gcc_unreachable ();
261 /* Fall through. */
262 case LOCATION_ASPECT_CARET:
263 break;
264 case LOCATION_ASPECT_START:
266 location_t start = get_start (loc);
267 if (start != loc)
268 return expand_location_1 (start, expansion_point_p, aspect);
270 break;
271 case LOCATION_ASPECT_FINISH:
273 location_t finish = get_finish (loc);
274 if (finish != loc)
275 return expand_location_1 (finish, expansion_point_p, aspect);
277 break;
279 xloc = linemap_expand_location (line_table, map, loc);
282 xloc.data = block;
283 if (loc <= BUILTINS_LOCATION)
284 xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin ();
286 return xloc;
289 /* Initialize the set of cache used for files accessed by caret
290 diagnostic. */
292 static void
293 diagnostic_file_cache_init (void)
295 gcc_assert (global_dc);
296 if (global_dc->m_file_cache == NULL)
297 global_dc->m_file_cache = new file_cache ();
300 /* Free the resources used by the set of cache used for files accessed
301 by caret diagnostic. */
303 void
304 diagnostic_file_cache_fini (void)
306 if (global_dc->m_file_cache)
308 delete global_dc->m_file_cache;
309 global_dc->m_file_cache = NULL;
313 /* Return the total lines number that have been read so far by the
314 line map (in the preprocessor) so far. For languages like C++ that
315 entirely preprocess the input file before starting to parse, this
316 equals the actual number of lines of the file. */
318 static size_t
319 total_lines_num (const char *file_path)
321 size_t r = 0;
322 location_t l = 0;
323 if (linemap_get_file_highest_location (line_table, file_path, &l))
325 gcc_assert (l >= RESERVED_LOCATION_COUNT);
326 expanded_location xloc = expand_location (l);
327 r = xloc.line;
329 return r;
332 /* Lookup the cache used for the content of a given file accessed by
333 caret diagnostic. Return the found cached file, or NULL if no
334 cached file was found. */
336 file_cache_slot *
337 file_cache::lookup_file (const char *file_path)
339 gcc_assert (file_path);
341 /* This will contain the found cached file. */
342 file_cache_slot *r = NULL;
343 for (unsigned i = 0; i < num_file_slots; ++i)
345 file_cache_slot *c = &m_file_slots[i];
346 if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
348 c->inc_use_count ();
349 r = c;
353 if (r)
354 r->inc_use_count ();
356 return r;
359 /* Purge any mention of FILENAME from the cache of files used for
360 printing source code. For use in selftests when working
361 with tempfiles. */
363 void
364 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
366 gcc_assert (file_path);
368 if (!global_dc->m_file_cache)
369 return;
371 global_dc->m_file_cache->forcibly_evict_file (file_path);
374 void
375 file_cache::forcibly_evict_file (const char *file_path)
377 gcc_assert (file_path);
379 file_cache_slot *r = lookup_file (file_path);
380 if (!r)
381 /* Not found. */
382 return;
384 r->evict ();
387 void
388 file_cache_slot::evict ()
390 m_file_path = NULL;
391 if (m_fp)
392 fclose (m_fp);
393 m_fp = NULL;
394 m_nb_read = 0;
395 m_line_start_idx = 0;
396 m_line_num = 0;
397 m_line_record.truncate (0);
398 m_use_count = 0;
399 m_total_lines = 0;
400 m_missing_trailing_newline = true;
403 /* Return the file cache that has been less used, recently, or the
404 first empty one. If HIGHEST_USE_COUNT is non-null,
405 *HIGHEST_USE_COUNT is set to the highest use count of the entries
406 in the cache table. */
408 file_cache_slot*
409 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
411 diagnostic_file_cache_init ();
413 file_cache_slot *to_evict = &m_file_slots[0];
414 unsigned huc = to_evict->get_use_count ();
415 for (unsigned i = 1; i < num_file_slots; ++i)
417 file_cache_slot *c = &m_file_slots[i];
418 bool c_is_empty = (c->get_file_path () == NULL);
420 if (c->get_use_count () < to_evict->get_use_count ()
421 || (to_evict->get_file_path () && c_is_empty))
422 /* We evict C because it's either an entry with a lower use
423 count or one that is empty. */
424 to_evict = c;
426 if (huc < c->get_use_count ())
427 huc = c->get_use_count ();
429 if (c_is_empty)
430 /* We've reached the end of the cache; subsequent elements are
431 all empty. */
432 break;
435 if (highest_use_count)
436 *highest_use_count = huc;
438 return to_evict;
441 /* Create the cache used for the content of a given file to be
442 accessed by caret diagnostic. This cache is added to an array of
443 cache and can be retrieved by lookup_file_in_cache_tab. This
444 function returns the created cache. Note that only the last
445 num_file_slots files are cached. */
447 file_cache_slot*
448 file_cache::add_file (const char *file_path)
451 FILE *fp = fopen (file_path, "r");
452 if (fp == NULL)
453 return NULL;
455 unsigned highest_use_count = 0;
456 file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
457 if (!r->create (in_context, file_path, fp, highest_use_count))
458 return NULL;
459 return r;
462 /* Populate this slot for use on FILE_PATH and FP, dropping any
463 existing cached content within it. */
465 bool
466 file_cache_slot::create (const file_cache::input_context &in_context,
467 const char *file_path, FILE *fp,
468 unsigned highest_use_count)
470 m_file_path = file_path;
471 if (m_fp)
472 fclose (m_fp);
473 m_fp = fp;
474 if (m_alloc_offset)
475 offset_buffer (-m_alloc_offset);
476 m_nb_read = 0;
477 m_line_start_idx = 0;
478 m_line_num = 0;
479 m_line_record.truncate (0);
480 /* Ensure that this cache entry doesn't get evicted next time
481 add_file_to_cache_tab is called. */
482 m_use_count = ++highest_use_count;
483 m_total_lines = total_lines_num (file_path);
484 m_missing_trailing_newline = true;
487 /* Check the input configuration to determine if we need to do any
488 transformations, such as charset conversion or BOM skipping. */
489 if (const char *input_charset = in_context.ccb (file_path))
491 /* Need a full-blown conversion of the input charset. */
492 fclose (m_fp);
493 m_fp = NULL;
494 const cpp_converted_source cs
495 = cpp_get_converted_source (file_path, input_charset);
496 if (!cs.data)
497 return false;
498 if (m_data)
499 XDELETEVEC (m_data);
500 m_data = cs.data;
501 m_nb_read = m_size = cs.len;
502 m_alloc_offset = cs.data - cs.to_free;
504 else if (in_context.should_skip_bom)
506 if (read_data ())
508 const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
509 offset_buffer (offset);
510 m_nb_read -= offset;
514 return true;
517 /* file_cache's ctor. */
519 file_cache::file_cache ()
520 : m_file_slots (new file_cache_slot[num_file_slots])
522 initialize_input_context (nullptr, false);
525 /* file_cache's dtor. */
527 file_cache::~file_cache ()
529 delete[] m_file_slots;
532 /* Lookup the cache used for the content of a given file accessed by
533 caret diagnostic. If no cached file was found, create a new cache
534 for this file, add it to the array of cached file and return
535 it. */
537 file_cache_slot*
538 file_cache::lookup_or_add_file (const char *file_path)
540 file_cache_slot *r = lookup_file (file_path);
541 if (r == NULL)
542 r = add_file (file_path);
543 return r;
546 /* Default constructor for a cache of file used by caret
547 diagnostic. */
549 file_cache_slot::file_cache_slot ()
550 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
551 m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
552 m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
554 m_line_record.create (0);
557 /* Destructor for a cache of file used by caret diagnostic. */
559 file_cache_slot::~file_cache_slot ()
561 if (m_fp)
563 fclose (m_fp);
564 m_fp = NULL;
566 if (m_data)
568 offset_buffer (-m_alloc_offset);
569 XDELETEVEC (m_data);
570 m_data = 0;
572 m_line_record.release ();
575 /* Returns TRUE iff the cache would need to be filled with data coming
576 from the file. That is, either the cache is empty or full or the
577 current line is empty. Note that if the cache is full, it would
578 need to be extended and filled again. */
580 bool
581 file_cache_slot::needs_read_p () const
583 return m_fp && (m_nb_read == 0
584 || m_nb_read == m_size
585 || (m_line_start_idx >= m_nb_read - 1));
588 /* Return TRUE iff the cache is full and thus needs to be
589 extended. */
591 bool
592 file_cache_slot::needs_grow_p () const
594 return m_nb_read == m_size;
597 /* Grow the cache if it needs to be extended. */
599 void
600 file_cache_slot::maybe_grow ()
602 if (!needs_grow_p ())
603 return;
605 if (!m_data)
607 gcc_assert (m_size == 0 && m_alloc_offset == 0);
608 m_size = buffer_size;
609 m_data = XNEWVEC (char, m_size);
611 else
613 const int offset = m_alloc_offset;
614 offset_buffer (-offset);
615 m_size *= 2;
616 m_data = XRESIZEVEC (char, m_data, m_size);
617 offset_buffer (offset);
621 /* Read more data into the cache. Extends the cache if need be.
622 Returns TRUE iff new data could be read. */
624 bool
625 file_cache_slot::read_data ()
627 if (feof (m_fp) || ferror (m_fp))
628 return false;
630 maybe_grow ();
632 char * from = m_data + m_nb_read;
633 size_t to_read = m_size - m_nb_read;
634 size_t nb_read = fread (from, 1, to_read, m_fp);
636 if (ferror (m_fp))
637 return false;
639 m_nb_read += nb_read;
640 return !!nb_read;
643 /* Read new data iff the cache needs to be filled with more data
644 coming from the file FP. Return TRUE iff the cache was filled with
645 mode data. */
647 bool
648 file_cache_slot::maybe_read_data ()
650 if (!needs_read_p ())
651 return false;
652 return read_data ();
655 /* Helper function for file_cache_slot::get_next_line (), to find the end of
656 the next line. Returns with the memchr convention, i.e. nullptr if a line
657 terminator was not found. We need to determine line endings in the same
658 manner that libcpp does: any of \n, \r\n, or \r is a line ending. */
660 static char *
661 find_end_of_line (char *s, size_t len)
663 for (const auto end = s + len; s != end; ++s)
665 if (*s == '\n')
666 return s;
667 if (*s == '\r')
669 const auto next = s + 1;
670 if (next == end)
672 /* Don't find the line ending if \r is the very last character
673 in the buffer; we do not know if it's the end of the file or
674 just the end of what has been read so far, and we wouldn't
675 want to break in the middle of what's actually a \r\n
676 sequence. Instead, we will handle the case of a file ending
677 in a \r later. */
678 break;
680 return (*next == '\n' ? next : s);
683 return nullptr;
686 /* Read a new line from file FP, using C as a cache for the data
687 coming from the file. Upon successful completion, *LINE is set to
688 the beginning of the line found. *LINE points directly in the
689 line cache and is only valid until the next call of get_next_line.
690 *LINE_LEN is set to the length of the line. Note that the line
691 does not contain any terminal delimiter. This function returns
692 true if some data was read or process from the cache, false
693 otherwise. Note that subsequent calls to get_next_line might
694 make the content of *LINE invalid. */
696 bool
697 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
699 /* Fill the cache with data to process. */
700 maybe_read_data ();
702 size_t remaining_size = m_nb_read - m_line_start_idx;
703 if (remaining_size == 0)
704 /* There is no more data to process. */
705 return false;
707 char *line_start = m_data + m_line_start_idx;
709 char *next_line_start = NULL;
710 size_t len = 0;
711 char *line_end = find_end_of_line (line_start, remaining_size);
712 if (line_end == NULL)
714 /* We haven't found an end-of-line delimiter in the cache.
715 Fill the cache with more data from the file and look again. */
716 while (maybe_read_data ())
718 line_start = m_data + m_line_start_idx;
719 remaining_size = m_nb_read - m_line_start_idx;
720 line_end = find_end_of_line (line_start, remaining_size);
721 if (line_end != NULL)
723 next_line_start = line_end + 1;
724 break;
727 if (line_end == NULL)
729 /* We've loaded all the file into the cache and still no
730 terminator. Let's say the line ends up at one byte past the
731 end of the file. This is to stay consistent with the case
732 of when the line ends up with a terminator and line_end points to
733 that. That consistency is useful below in the len calculation.
735 If the file ends in a \r, we didn't identify it as a line
736 terminator above, so do that now instead. */
737 line_end = m_data + m_nb_read;
738 if (m_nb_read && line_end[-1] == '\r')
740 --line_end;
741 m_missing_trailing_newline = false;
743 else
744 m_missing_trailing_newline = true;
746 else
747 m_missing_trailing_newline = false;
749 else
751 next_line_start = line_end + 1;
752 m_missing_trailing_newline = false;
755 if (m_fp && ferror (m_fp))
756 return false;
758 /* At this point, we've found the end of the of line. It either points to
759 the line terminator or to one byte after the last byte of the file. */
760 gcc_assert (line_end != NULL);
762 len = line_end - line_start;
764 if (m_line_start_idx < m_nb_read)
765 *line = line_start;
767 ++m_line_num;
769 /* Before we update our line record, make sure the hint about the
770 total number of lines of the file is correct. If it's not, then
771 we give up recording line boundaries from now on. */
772 bool update_line_record = true;
773 if (m_line_num > m_total_lines)
774 update_line_record = false;
776 /* Now update our line record so that re-reading lines from the
777 before m_line_start_idx is faster. */
778 if (update_line_record
779 && m_line_record.length () < line_record_size)
781 /* If the file lines fits in the line record, we just record all
782 its lines ...*/
783 if (m_total_lines <= line_record_size
784 && m_line_num > m_line_record.length ())
785 m_line_record.safe_push
786 (file_cache_slot::line_info (m_line_num,
787 m_line_start_idx,
788 line_end - m_data));
789 else if (m_total_lines > line_record_size)
791 /* ... otherwise, we just scale total_lines down to
792 (line_record_size lines. */
793 size_t n = (m_line_num * line_record_size) / m_total_lines;
794 if (m_line_record.length () == 0
795 || n >= m_line_record.length ())
796 m_line_record.safe_push
797 (file_cache_slot::line_info (m_line_num,
798 m_line_start_idx,
799 line_end - m_data));
803 /* Update m_line_start_idx so that it points to the next line to be
804 read. */
805 if (next_line_start)
806 m_line_start_idx = next_line_start - m_data;
807 else
808 /* We didn't find any terminal '\n'. Let's consider that the end
809 of line is the end of the data in the cache. The next
810 invocation of get_next_line will either read more data from the
811 underlying file or return false early because we've reached the
812 end of the file. */
813 m_line_start_idx = m_nb_read;
815 *line_len = len;
817 return true;
820 /* Consume the next bytes coming from the cache (or from its
821 underlying file if there are remaining unread bytes in the file)
822 until we reach the next end-of-line (or end-of-file). There is no
823 copying from the cache involved. Return TRUE upon successful
824 completion. */
826 bool
827 file_cache_slot::goto_next_line ()
829 char *l;
830 ssize_t len;
832 return get_next_line (&l, &len);
835 /* Read an arbitrary line number LINE_NUM from the file cached in C.
836 If the line was read successfully, *LINE points to the beginning
837 of the line in the file cache and *LINE_LEN is the length of the
838 line. *LINE is not nul-terminated, but may contain zero bytes.
839 *LINE is only valid until the next call of read_line_num.
840 This function returns bool if a line was read. */
842 bool
843 file_cache_slot::read_line_num (size_t line_num,
844 char ** line, ssize_t *line_len)
846 gcc_assert (line_num > 0);
848 if (line_num <= m_line_num)
850 /* We've been asked to read lines that are before m_line_num.
851 So lets use our line record (if it's not empty) to try to
852 avoid re-reading the file from the beginning again. */
854 if (m_line_record.is_empty ())
856 m_line_start_idx = 0;
857 m_line_num = 0;
859 else
861 file_cache_slot::line_info *i = NULL;
862 if (m_total_lines <= line_record_size)
864 /* In languages where the input file is not totally
865 preprocessed up front, the m_total_lines hint
866 can be smaller than the number of lines of the
867 file. In that case, only the first
868 m_total_lines have been recorded.
870 Otherwise, the first m_total_lines we've read have
871 their start/end recorded here. */
872 i = (line_num <= m_total_lines)
873 ? &m_line_record[line_num - 1]
874 : &m_line_record[m_total_lines - 1];
875 gcc_assert (i->line_num <= line_num);
877 else
879 /* So the file had more lines than our line record
880 size. Thus the number of lines we've recorded has
881 been scaled down to line_record_size. Let's
882 pick the start/end of the recorded line that is
883 closest to line_num. */
884 size_t n = (line_num <= m_total_lines)
885 ? line_num * line_record_size / m_total_lines
886 : m_line_record.length () - 1;
887 if (n < m_line_record.length ())
889 i = &m_line_record[n];
890 gcc_assert (i->line_num <= line_num);
894 if (i && i->line_num == line_num)
896 /* We have the start/end of the line. */
897 *line = m_data + i->start_pos;
898 *line_len = i->end_pos - i->start_pos;
899 return true;
902 if (i)
904 m_line_start_idx = i->start_pos;
905 m_line_num = i->line_num - 1;
907 else
909 m_line_start_idx = 0;
910 m_line_num = 0;
915 /* Let's walk from line m_line_num up to line_num - 1, without
916 copying any line. */
917 while (m_line_num < line_num - 1)
918 if (!goto_next_line ())
919 return false;
921 /* The line we want is the next one. Let's read and copy it back to
922 the caller. */
923 return get_next_line (line, line_len);
926 /* Return the physical source line that corresponds to FILE_PATH/LINE.
927 The line is not nul-terminated. The returned pointer is only
928 valid until the next call of location_get_source_line.
929 Note that the line can contain several null characters,
930 so the returned value's length has the actual length of the line.
931 If the function fails, a NULL char_span is returned. */
933 char_span
934 location_get_source_line (const char *file_path, int line)
936 char *buffer = NULL;
937 ssize_t len;
939 if (line == 0)
940 return char_span (NULL, 0);
942 if (file_path == NULL)
943 return char_span (NULL, 0);
945 diagnostic_file_cache_init ();
947 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
948 if (c == NULL)
949 return char_span (NULL, 0);
951 bool read = c->read_line_num (line, &buffer, &len);
952 if (!read)
953 return char_span (NULL, 0);
955 return char_span (buffer, len);
958 /* Return a NUL-terminated copy of the source text between two locations, or
959 NULL if the arguments are invalid. The caller is responsible for freeing
960 the return value. */
962 char *
963 get_source_text_between (location_t start, location_t end)
965 expanded_location expstart =
966 expand_location_to_spelling_point (start, LOCATION_ASPECT_START);
967 expanded_location expend =
968 expand_location_to_spelling_point (end, LOCATION_ASPECT_FINISH);
970 /* If the locations are in different files or the end comes before the
971 start, give up and return nothing. */
972 if (!expstart.file || !expend.file)
973 return NULL;
974 if (strcmp (expstart.file, expend.file) != 0)
975 return NULL;
976 if (expstart.line > expend.line)
977 return NULL;
978 if (expstart.line == expend.line
979 && expstart.column > expend.column)
980 return NULL;
981 /* These aren't real column numbers, give up. */
982 if (expstart.column == 0 || expend.column == 0)
983 return NULL;
985 /* For a single line we need to trim both edges. */
986 if (expstart.line == expend.line)
988 char_span line = location_get_source_line (expstart.file, expstart.line);
989 if (line.length () < 1)
990 return NULL;
991 int s = expstart.column - 1;
992 int len = expend.column - s;
993 if (line.length () < (size_t)expend.column)
994 return NULL;
995 return line.subspan (s, len).xstrdup ();
998 struct obstack buf_obstack;
999 obstack_init (&buf_obstack);
1001 /* Loop through all lines in the range and append each to buf; may trim
1002 parts of the start and end lines off depending on column values. */
1003 for (int lnum = expstart.line; lnum <= expend.line; ++lnum)
1005 char_span line = location_get_source_line (expstart.file, lnum);
1006 if (line.length () < 1 && (lnum != expstart.line && lnum != expend.line))
1007 continue;
1009 /* For the first line in the range, only start at expstart.column */
1010 if (lnum == expstart.line)
1012 unsigned off = expstart.column - 1;
1013 if (line.length () < off)
1014 return NULL;
1015 line = line.subspan (off, line.length() - off);
1017 /* For the last line, don't go past expend.column */
1018 else if (lnum == expend.line)
1020 if (line.length () < (size_t)expend.column)
1021 return NULL;
1022 line = line.subspan (0, expend.column);
1025 /* Combine spaces at the beginning of later lines. */
1026 if (lnum > expstart.line)
1028 unsigned off;
1029 for (off = 0; off < line.length(); ++off)
1030 if (line[off] != ' ' && line[off] != '\t')
1031 break;
1032 if (off > 0)
1034 obstack_1grow (&buf_obstack, ' ');
1035 line = line.subspan (off, line.length() - off);
1039 /* This does not include any trailing newlines. */
1040 obstack_grow (&buf_obstack, line.get_buffer (), line.length ());
1043 /* NUL-terminate and finish the buf obstack. */
1044 obstack_1grow (&buf_obstack, 0);
1045 const char *buf = (const char *) obstack_finish (&buf_obstack);
1047 return xstrdup (buf);
1050 /* Determine if FILE_PATH missing a trailing newline on its final line.
1051 Only valid to call once all of the file has been loaded, by
1052 requesting a line number beyond the end of the file. */
1054 bool
1055 location_missing_trailing_newline (const char *file_path)
1057 diagnostic_file_cache_init ();
1059 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
1060 if (c == NULL)
1061 return false;
1063 return c->missing_trailing_newline_p ();
1066 /* Test if the location originates from the spelling location of a
1067 builtin-tokens. That is, return TRUE if LOC is a (possibly
1068 virtual) location of a built-in token that appears in the expansion
1069 list of a macro. Please note that this function also works on
1070 tokens that result from built-in tokens. For instance, the
1071 function would return true if passed a token "4" that is the result
1072 of the expansion of the built-in __LINE__ macro. */
1073 bool
1074 is_location_from_builtin_token (location_t loc)
1076 const line_map_ordinary *map = NULL;
1077 loc = linemap_resolve_location (line_table, loc,
1078 LRK_SPELLING_LOCATION, &map);
1079 return loc == BUILTINS_LOCATION;
1082 /* Expand the source location LOC into a human readable location. If
1083 LOC is virtual, it resolves to the expansion point of the involved
1084 macro. If LOC resolves to a builtin location, the file name of the
1085 readable location is set to the string "<built-in>". */
1087 expanded_location
1088 expand_location (location_t loc)
1090 return expand_location_1 (loc, /*expansion_point_p=*/true,
1091 LOCATION_ASPECT_CARET);
1094 /* Expand the source location LOC into a human readable location. If
1095 LOC is virtual, it resolves to the expansion location of the
1096 relevant macro. If LOC resolves to a builtin location, the file
1097 name of the readable location is set to the string
1098 "<built-in>". */
1100 expanded_location
1101 expand_location_to_spelling_point (location_t loc,
1102 enum location_aspect aspect)
1104 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
1107 /* The rich_location class within libcpp requires a way to expand
1108 location_t instances, and relies on the client code
1109 providing a symbol named
1110 linemap_client_expand_location_to_spelling_point
1111 to do this.
1113 This is the implementation for libcommon.a (all host binaries),
1114 which simply calls into expand_location_1. */
1116 expanded_location
1117 linemap_client_expand_location_to_spelling_point (location_t loc,
1118 enum location_aspect aspect)
1120 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
1124 /* If LOCATION is in a system header and if it is a virtual location
1125 for a token coming from the expansion of a macro, unwind it to
1126 the location of the expansion point of the macro. If the expansion
1127 point is also in a system header return the original LOCATION.
1128 Otherwise, return the location of the expansion point.
1130 This is used for instance when we want to emit diagnostics about a
1131 token that may be located in a macro that is itself defined in a
1132 system header, for example, for the NULL macro. In such a case, if
1133 LOCATION were passed directly to diagnostic functions such as
1134 warning_at, the diagnostic would be suppressed (unless
1135 -Wsystem-headers). */
1137 location_t
1138 expansion_point_location_if_in_system_header (location_t location)
1140 if (!in_system_header_at (location))
1141 return location;
1143 location_t xloc = linemap_resolve_location (line_table, location,
1144 LRK_MACRO_EXPANSION_POINT,
1145 NULL);
1146 return in_system_header_at (xloc) ? location : xloc;
1149 /* If LOCATION is a virtual location for a token coming from the expansion
1150 of a macro, unwind to the location of the expansion point of the macro. */
1152 location_t
1153 expansion_point_location (location_t location)
1155 return linemap_resolve_location (line_table, location,
1156 LRK_MACRO_EXPANSION_POINT, NULL);
1159 /* Construct a location with caret at CARET, ranging from START to
1160 finish e.g.
1162 11111111112
1163 12345678901234567890
1165 523 return foo + bar;
1166 ~~~~^~~~~
1169 The location's caret is at the "+", line 523 column 15, but starts
1170 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
1171 of "bar" at column 19. */
1173 location_t
1174 make_location (location_t caret, location_t start, location_t finish)
1176 location_t pure_loc = get_pure_location (caret);
1177 source_range src_range;
1178 src_range.m_start = get_start (start);
1179 src_range.m_finish = get_finish (finish);
1180 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
1181 pure_loc,
1182 src_range,
1183 NULL,
1185 return combined_loc;
1188 /* Same as above, but taking a source range rather than two locations. */
1190 location_t
1191 make_location (location_t caret, source_range src_range)
1193 location_t pure_loc = get_pure_location (caret);
1194 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL, 0);
1197 /* An expanded_location stores the column in byte units. This function
1198 converts that column to display units. That requires reading the associated
1199 source line in order to calculate the display width. If that cannot be done
1200 for any reason, then returns the byte column as a fallback. */
1202 location_compute_display_column (expanded_location exploc,
1203 const cpp_char_column_policy &policy)
1205 if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1206 return exploc.column;
1207 char_span line = location_get_source_line (exploc.file, exploc.line);
1208 /* If line is NULL, this function returns exploc.column which is the
1209 desired fallback. */
1210 return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
1211 exploc.column, policy);
1214 /* Dump statistics to stderr about the memory usage of the line_table
1215 set of line maps. This also displays some statistics about macro
1216 expansion. */
1218 void
1219 dump_line_table_statistics (void)
1221 struct linemap_stats s;
1222 long total_used_map_size,
1223 macro_maps_size,
1224 total_allocated_map_size;
1226 memset (&s, 0, sizeof (s));
1228 linemap_get_statistics (line_table, &s);
1230 macro_maps_size = s.macro_maps_used_size
1231 + s.macro_maps_locations_size;
1233 total_allocated_map_size = s.ordinary_maps_allocated_size
1234 + s.macro_maps_allocated_size
1235 + s.macro_maps_locations_size;
1237 total_used_map_size = s.ordinary_maps_used_size
1238 + s.macro_maps_used_size
1239 + s.macro_maps_locations_size;
1241 fprintf (stderr, "Number of expanded macros: %5ld\n",
1242 s.num_expanded_macros);
1243 if (s.num_expanded_macros != 0)
1244 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
1245 s.num_macro_tokens / s.num_expanded_macros);
1246 fprintf (stderr,
1247 "\nLine Table allocations during the "
1248 "compilation process\n");
1249 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
1250 SIZE_AMOUNT (s.num_ordinary_maps_used));
1251 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
1252 SIZE_AMOUNT (s.ordinary_maps_used_size));
1253 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
1254 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1255 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
1256 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1257 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
1258 SIZE_AMOUNT (s.num_macro_maps_used));
1259 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
1260 SIZE_AMOUNT (s.macro_maps_used_size));
1261 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
1262 SIZE_AMOUNT (s.macro_maps_locations_size));
1263 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
1264 SIZE_AMOUNT (macro_maps_size));
1265 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
1266 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1267 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
1268 SIZE_AMOUNT (total_allocated_map_size));
1269 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
1270 SIZE_AMOUNT (total_used_map_size));
1271 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
1272 SIZE_AMOUNT (s.adhoc_table_size));
1273 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
1274 SIZE_AMOUNT (s.adhoc_table_entries_used));
1275 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
1276 SIZE_AMOUNT (line_table->num_optimized_ranges));
1277 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
1278 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
1280 fprintf (stderr, "\n");
1283 /* Get location one beyond the final location in ordinary map IDX. */
1285 static location_t
1286 get_end_location (class line_maps *set, unsigned int idx)
1288 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1289 return set->highest_location;
1291 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1292 return MAP_START_LOCATION (next_map);
1295 /* Helper function for write_digit_row. */
1297 static void
1298 write_digit (FILE *stream, int digit)
1300 fputc ('0' + (digit % 10), stream);
1303 /* Helper function for dump_location_info.
1304 Write a row of numbers to STREAM, numbering a source line,
1305 giving the units, tens, hundreds etc of the column number. */
1307 static void
1308 write_digit_row (FILE *stream, int indent,
1309 const line_map_ordinary *map,
1310 location_t loc, int max_col, int divisor)
1312 fprintf (stream, "%*c", indent, ' ');
1313 fprintf (stream, "|");
1314 for (int column = 1; column < max_col; column++)
1316 location_t column_loc = loc + (column << map->m_range_bits);
1317 write_digit (stream, column_loc / divisor);
1319 fprintf (stream, "\n");
1322 /* Write a half-closed (START) / half-open (END) interval of
1323 location_t to STREAM. */
1325 static void
1326 dump_location_range (FILE *stream,
1327 location_t start, location_t end)
1329 fprintf (stream,
1330 " location_t interval: %u <= loc < %u\n",
1331 start, end);
1334 /* Write a labelled description of a half-closed (START) / half-open (END)
1335 interval of location_t to STREAM. */
1337 static void
1338 dump_labelled_location_range (FILE *stream,
1339 const char *name,
1340 location_t start, location_t end)
1342 fprintf (stream, "%s\n", name);
1343 dump_location_range (stream, start, end);
1344 fprintf (stream, "\n");
1347 /* Write a visualization of the locations in the line_table to STREAM. */
1349 void
1350 dump_location_info (FILE *stream)
1352 /* Visualize the reserved locations. */
1353 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1354 0, RESERVED_LOCATION_COUNT);
1356 /* Visualize the ordinary line_map instances, rendering the sources. */
1357 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1359 location_t end_location = get_end_location (line_table, idx);
1360 /* half-closed: doesn't include this one. */
1362 const line_map_ordinary *map
1363 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1364 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1365 dump_location_range (stream,
1366 MAP_START_LOCATION (map), end_location);
1367 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1368 fprintf (stream, " starting at line: %i\n",
1369 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1370 fprintf (stream, " column and range bits: %i\n",
1371 map->m_column_and_range_bits);
1372 fprintf (stream, " column bits: %i\n",
1373 map->m_column_and_range_bits - map->m_range_bits);
1374 fprintf (stream, " range bits: %i\n",
1375 map->m_range_bits);
1376 const char * reason;
1377 switch (map->reason) {
1378 case LC_ENTER:
1379 reason = "LC_ENTER";
1380 break;
1381 case LC_LEAVE:
1382 reason = "LC_LEAVE";
1383 break;
1384 case LC_RENAME:
1385 reason = "LC_RENAME";
1386 break;
1387 case LC_RENAME_VERBATIM:
1388 reason = "LC_RENAME_VERBATIM";
1389 break;
1390 case LC_ENTER_MACRO:
1391 reason = "LC_RENAME_MACRO";
1392 break;
1393 default:
1394 reason = "Unknown";
1396 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1398 const line_map_ordinary *includer_map
1399 = linemap_included_from_linemap (line_table, map);
1400 fprintf (stream, " included from location: %d",
1401 linemap_included_from (map));
1402 if (includer_map) {
1403 fprintf (stream, " (in ordinary map %d)",
1404 int (includer_map - line_table->info_ordinary.maps));
1406 fprintf (stream, "\n");
1408 /* Render the span of source lines that this "map" covers. */
1409 for (location_t loc = MAP_START_LOCATION (map);
1410 loc < end_location;
1411 loc += (1 << map->m_range_bits) )
1413 gcc_assert (pure_location_p (line_table, loc) );
1415 expanded_location exploc
1416 = linemap_expand_location (line_table, map, loc);
1418 if (exploc.column == 0)
1420 /* Beginning of a new source line: draw the line. */
1422 char_span line_text = location_get_source_line (exploc.file,
1423 exploc.line);
1424 if (!line_text)
1425 break;
1426 fprintf (stream,
1427 "%s:%3i|loc:%5i|%.*s\n",
1428 exploc.file, exploc.line,
1429 loc,
1430 (int)line_text.length (), line_text.get_buffer ());
1432 /* "loc" is at column 0, which means "the whole line".
1433 Render the locations *within* the line, by underlining
1434 it, showing the location_t numeric values
1435 at each column. */
1436 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1437 if (max_col > line_text.length ())
1438 max_col = line_text.length () + 1;
1440 int len_lnum = num_digits (exploc.line);
1441 if (len_lnum < 3)
1442 len_lnum = 3;
1443 int len_loc = num_digits (loc);
1444 if (len_loc < 5)
1445 len_loc = 5;
1447 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1449 /* Thousands. */
1450 if (end_location > 999)
1451 write_digit_row (stream, indent, map, loc, max_col, 1000);
1453 /* Hundreds. */
1454 if (end_location > 99)
1455 write_digit_row (stream, indent, map, loc, max_col, 100);
1457 /* Tens. */
1458 write_digit_row (stream, indent, map, loc, max_col, 10);
1460 /* Units. */
1461 write_digit_row (stream, indent, map, loc, max_col, 1);
1464 fprintf (stream, "\n");
1467 /* Visualize unallocated values. */
1468 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1469 line_table->highest_location,
1470 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1472 /* Visualize the macro line_map instances, rendering the sources. */
1473 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1475 /* Each macro map that is allocated owns location_t values
1476 that are *lower* that the one before them.
1477 Hence it's meaningful to view them either in order of ascending
1478 source locations, or in order of ascending macro map index. */
1479 const bool ascending_location_ts = true;
1480 unsigned int idx = (ascending_location_ts
1481 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1482 : i);
1483 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1484 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1485 idx,
1486 linemap_map_get_macro_name (map),
1487 MACRO_MAP_NUM_MACRO_TOKENS (map));
1488 dump_location_range (stream,
1489 map->start_location,
1490 (map->start_location
1491 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1492 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1493 "expansion point is location %i",
1494 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1495 fprintf (stream, " map->start_location: %u\n",
1496 map->start_location);
1498 fprintf (stream, " macro_locations:\n");
1499 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1501 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1502 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1504 /* linemap_add_macro_token encodes token numbers in an expansion
1505 by putting them after MAP_START_LOCATION. */
1507 /* I'm typically seeing 4 uninitialized entries at the end of
1508 0xafafafaf.
1509 This appears to be due to macro.cc:replace_args
1510 adding 2 extra args for padding tokens; presumably there may
1511 be a leading and/or trailing padding token injected,
1512 each for 2 more location slots.
1513 This would explain there being up to 4 location_ts slots
1514 that may be uninitialized. */
1516 fprintf (stream, " %u: %u, %u\n",
1520 if (x == y)
1522 if (x < MAP_START_LOCATION (map))
1523 inform (x, "token %u has %<x-location == y-location == %u%>",
1524 i, x);
1525 else
1526 fprintf (stream,
1527 "x-location == y-location == %u encodes token # %u\n",
1528 x, x - MAP_START_LOCATION (map));
1530 else
1532 inform (x, "token %u has %<x-location == %u%>", i, x);
1533 inform (x, "token %u has %<y-location == %u%>", i, y);
1536 fprintf (stream, "\n");
1539 /* It appears that MAX_LOCATION_T itself is never assigned to a
1540 macro map, presumably due to an off-by-one error somewhere
1541 between the logic in linemap_enter_macro and
1542 LINEMAPS_MACRO_LOWEST_LOCATION. */
1543 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1544 MAX_LOCATION_T,
1545 MAX_LOCATION_T + 1);
1547 /* Visualize ad-hoc values. */
1548 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1549 MAX_LOCATION_T + 1, UINT_MAX);
1552 /* string_concat's constructor. */
1554 string_concat::string_concat (int num, location_t *locs)
1555 : m_num (num)
1557 m_locs = ggc_vec_alloc <location_t> (num);
1558 for (int i = 0; i < num; i++)
1559 m_locs[i] = locs[i];
1562 /* string_concat_db's constructor. */
1564 string_concat_db::string_concat_db ()
1566 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1569 /* Record that a string concatenation occurred, covering NUM
1570 string literal tokens. LOCS is an array of size NUM, containing the
1571 locations of the tokens. A copy of LOCS is taken. */
1573 void
1574 string_concat_db::record_string_concatenation (int num, location_t *locs)
1576 gcc_assert (num > 1);
1577 gcc_assert (locs);
1579 location_t key_loc = get_key_loc (locs[0]);
1580 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1581 any data now recorded under key 'key_loc' would be overwritten by a
1582 subsequent call with the same key 'key_loc'. */
1583 if (RESERVED_LOCATION_P (key_loc))
1584 return;
1586 string_concat *concat
1587 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1588 m_table->put (key_loc, concat);
1591 /* Determine if LOC was the location of the initial token of a
1592 concatenation of string literal tokens.
1593 If so, *OUT_NUM is written to with the number of tokens, and
1594 *OUT_LOCS with the location of an array of locations of the
1595 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1596 storage owned by the string_concat_db.
1597 Otherwise, return false. */
1599 bool
1600 string_concat_db::get_string_concatenation (location_t loc,
1601 int *out_num,
1602 location_t **out_locs)
1604 gcc_assert (out_num);
1605 gcc_assert (out_locs);
1607 location_t key_loc = get_key_loc (loc);
1608 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1609 discussion in 'string_concat_db::record_string_concatenation'. */
1610 if (RESERVED_LOCATION_P (key_loc))
1611 return false;
1613 string_concat **concat = m_table->get (key_loc);
1614 if (!concat)
1615 return false;
1617 *out_num = (*concat)->m_num;
1618 *out_locs =(*concat)->m_locs;
1619 return true;
1622 /* Internal function. Canonicalize LOC into a form suitable for
1623 use as a key within the database, stripping away macro expansion,
1624 ad-hoc information, and range information, using the location of
1625 the start of LOC within an ordinary linemap. */
1627 location_t
1628 string_concat_db::get_key_loc (location_t loc)
1630 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1631 NULL);
1633 loc = get_range_from_loc (line_table, loc).m_start;
1635 return loc;
1638 /* Helper class for use within get_substring_ranges_for_loc.
1639 An vec of cpp_string with responsibility for releasing all of the
1640 str->text for each str in the vector. */
1642 class auto_cpp_string_vec : public auto_vec <cpp_string>
1644 public:
1645 auto_cpp_string_vec (int alloc)
1646 : auto_vec <cpp_string> (alloc) {}
1648 ~auto_cpp_string_vec ()
1650 /* Clean up the copies within this vec. */
1651 int i;
1652 cpp_string *str;
1653 FOR_EACH_VEC_ELT (*this, i, str)
1654 free (const_cast <unsigned char *> (str->text));
1658 /* Attempt to populate RANGES with source location information on the
1659 individual characters within the string literal found at STRLOC.
1660 If CONCATS is non-NULL, then any string literals that the token at
1661 STRLOC was concatenated with are also added to RANGES.
1663 Return NULL if successful, or an error message if any errors occurred (in
1664 which case RANGES may be only partially populated and should not
1665 be used).
1667 This is implemented by re-parsing the relevant source line(s). */
1669 static const char *
1670 get_substring_ranges_for_loc (cpp_reader *pfile,
1671 string_concat_db *concats,
1672 location_t strloc,
1673 enum cpp_ttype type,
1674 cpp_substring_ranges &ranges)
1676 gcc_assert (pfile);
1678 if (strloc == UNKNOWN_LOCATION)
1679 return "unknown location";
1681 /* Reparsing the strings requires accurate location information.
1682 If -ftrack-macro-expansion has been overridden from its default
1683 of 2, then we might have a location of a macro expansion point,
1684 rather than the location of the literal itself.
1685 Avoid this by requiring that we have full macro expansion tracking
1686 for substring locations to be available. */
1687 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1688 return "track_macro_expansion != 2";
1690 /* If #line or # 44 "file"-style directives are present, then there's
1691 no guarantee that the line numbers we have can be used to locate
1692 the strings. For example, we might have a .i file with # directives
1693 pointing back to lines within a .c file, but the .c file might
1694 have been edited since the .i file was created.
1695 In such a case, the safest course is to disable on-demand substring
1696 locations. */
1697 if (line_table->seen_line_directive)
1698 return "seen line directive";
1700 /* If string concatenation has occurred at STRLOC, get the locations
1701 of all of the literal tokens making up the compound string.
1702 Otherwise, just use STRLOC. */
1703 int num_locs = 1;
1704 location_t *strlocs = &strloc;
1705 if (concats)
1706 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1708 auto_cpp_string_vec strs (num_locs);
1709 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1710 for (int i = 0; i < num_locs; i++)
1712 /* Get range of strloc. We will use it to locate the start and finish
1713 of the literal token within the line. */
1714 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1716 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1718 /* If the string token was within a macro expansion, then we can
1719 cope with it for the simple case where we have a single token.
1720 Otherwise, bail out. */
1721 if (src_range.m_start != src_range.m_finish)
1722 return "macro expansion";
1724 else
1726 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1727 /* If so, we can't reliably determine where the token started within
1728 its line. */
1729 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1731 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1732 /* If so, we can't reliably determine where the token finished
1733 within its line. */
1734 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1737 expanded_location start
1738 = expand_location_to_spelling_point (src_range.m_start,
1739 LOCATION_ASPECT_START);
1740 expanded_location finish
1741 = expand_location_to_spelling_point (src_range.m_finish,
1742 LOCATION_ASPECT_FINISH);
1743 if (start.file != finish.file)
1744 return "range endpoints are in different files";
1745 if (start.line != finish.line)
1746 return "range endpoints are on different lines";
1747 if (start.column > finish.column)
1748 return "range endpoints are reversed";
1750 char_span line = location_get_source_line (start.file, start.line);
1751 if (!line)
1752 return "unable to read source line";
1754 /* Determine the location of the literal (including quotes
1755 and leading prefix chars, such as the 'u' in a u""
1756 token). */
1757 size_t literal_length = finish.column - start.column + 1;
1759 /* Ensure that we don't crash if we got the wrong location. */
1760 if (start.column < 1)
1761 return "zero start column";
1762 if (line.length () < (start.column - 1 + literal_length))
1763 return "line is not wide enough";
1765 char_span literal = line.subspan (start.column - 1, literal_length);
1767 cpp_string from;
1768 from.len = literal_length;
1769 /* Make a copy of the literal, to avoid having to rely on
1770 the lifetime of the copy of the line within the cache.
1771 This will be released by the auto_cpp_string_vec dtor. */
1772 from.text = (unsigned char *)literal.xstrdup ();
1773 strs.safe_push (from);
1775 /* For very long lines, a new linemap could have started
1776 halfway through the token.
1777 Ensure that the loc_reader uses the linemap of the
1778 *end* of the token for its start location. */
1779 const line_map_ordinary *start_ord_map;
1780 linemap_resolve_location (line_table, src_range.m_start,
1781 LRK_SPELLING_LOCATION, &start_ord_map);
1782 const line_map_ordinary *final_ord_map;
1783 linemap_resolve_location (line_table, src_range.m_finish,
1784 LRK_SPELLING_LOCATION, &final_ord_map);
1785 if (start_ord_map == NULL || final_ord_map == NULL)
1786 return "failed to get ordinary maps";
1787 /* Bulletproofing. We ought to only have different ordinary maps
1788 for start vs finish due to line-length jumps. */
1789 if (start_ord_map != final_ord_map
1790 && start_ord_map->to_file != final_ord_map->to_file)
1791 return "start and finish are spelled in different ordinary maps";
1792 /* The file from linemap_resolve_location ought to match that from
1793 expand_location_to_spelling_point. */
1794 if (start_ord_map->to_file != start.file)
1795 return "mismatching file after resolving linemap";
1797 location_t start_loc
1798 = linemap_position_for_line_and_column (line_table, final_ord_map,
1799 start.line, start.column);
1801 cpp_string_location_reader loc_reader (start_loc, line_table);
1802 loc_readers.safe_push (loc_reader);
1805 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1806 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1807 loc_readers.address (),
1808 num_locs, &ranges, type);
1809 if (err)
1810 return err;
1812 /* Success: "ranges" should now contain information on the string. */
1813 return NULL;
1816 /* Attempt to populate *OUT_LOC with source location information on the
1817 given characters within the string literal found at STRLOC.
1818 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1819 character set.
1821 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1822 and string literal "012345\n789"
1823 *OUT_LOC is written to with:
1824 "012345\n789"
1825 ~^~~~~
1827 If CONCATS is non-NULL, then any string literals that the token at
1828 STRLOC was concatenated with are also considered.
1830 This is implemented by re-parsing the relevant source line(s).
1832 Return NULL if successful, or an error message if any errors occurred.
1833 Error messages are intended for GCC developers (to help debugging) rather
1834 than for end-users. */
1836 const char *
1837 get_location_within_string (cpp_reader *pfile,
1838 string_concat_db *concats,
1839 location_t strloc,
1840 enum cpp_ttype type,
1841 int caret_idx, int start_idx, int end_idx,
1842 location_t *out_loc)
1844 gcc_checking_assert (caret_idx >= 0);
1845 gcc_checking_assert (start_idx >= 0);
1846 gcc_checking_assert (end_idx >= 0);
1847 gcc_assert (out_loc);
1849 cpp_substring_ranges ranges;
1850 const char *err
1851 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1852 if (err)
1853 return err;
1855 if (caret_idx >= ranges.get_num_ranges ())
1856 return "caret_idx out of range";
1857 if (start_idx >= ranges.get_num_ranges ())
1858 return "start_idx out of range";
1859 if (end_idx >= ranges.get_num_ranges ())
1860 return "end_idx out of range";
1862 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1863 ranges.get_range (start_idx).m_start,
1864 ranges.get_range (end_idx).m_finish);
1865 return NULL;
1868 /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */
1870 location_t
1871 location_with_discriminator (location_t locus, int discriminator)
1873 tree block = LOCATION_BLOCK (locus);
1874 source_range src_range = get_range_from_loc (line_table, locus);
1875 locus = get_pure_location (locus);
1877 if (locus == UNKNOWN_LOCATION)
1878 return locus;
1880 return COMBINE_LOCATION_DATA (line_table, locus, src_range, block, discriminator);
1883 /* Return TRUE if LOCUS represents a location with a discriminator. */
1885 bool
1886 has_discriminator (location_t locus)
1888 return get_discriminator_from_loc (locus) != 0;
1891 /* Return the discriminator for LOCUS. */
1894 get_discriminator_from_loc (location_t locus)
1896 return get_discriminator_from_loc (line_table, locus);
1899 #if CHECKING_P
1901 namespace selftest {
1903 /* Selftests of location handling. */
1905 /* Attempt to populate *OUT_RANGE with source location information on the
1906 given character within the string literal found at STRLOC.
1907 CHAR_IDX refers to an offset within the execution character set.
1908 If CONCATS is non-NULL, then any string literals that the token at
1909 STRLOC was concatenated with are also considered.
1911 This is implemented by re-parsing the relevant source line(s).
1913 Return NULL if successful, or an error message if any errors occurred.
1914 Error messages are intended for GCC developers (to help debugging) rather
1915 than for end-users. */
1917 static const char *
1918 get_source_range_for_char (cpp_reader *pfile,
1919 string_concat_db *concats,
1920 location_t strloc,
1921 enum cpp_ttype type,
1922 int char_idx,
1923 source_range *out_range)
1925 gcc_checking_assert (char_idx >= 0);
1926 gcc_assert (out_range);
1928 cpp_substring_ranges ranges;
1929 const char *err
1930 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1931 if (err)
1932 return err;
1934 if (char_idx >= ranges.get_num_ranges ())
1935 return "char_idx out of range";
1937 *out_range = ranges.get_range (char_idx);
1938 return NULL;
1941 /* As get_source_range_for_char, but write to *OUT the number
1942 of ranges that are available. */
1944 static const char *
1945 get_num_source_ranges_for_substring (cpp_reader *pfile,
1946 string_concat_db *concats,
1947 location_t strloc,
1948 enum cpp_ttype type,
1949 int *out)
1951 gcc_assert (out);
1953 cpp_substring_ranges ranges;
1954 const char *err
1955 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1957 if (err)
1958 return err;
1960 *out = ranges.get_num_ranges ();
1961 return NULL;
1964 /* Selftests of location handling. */
1966 /* Verify that compare() on linenum_type handles comparisons over the full
1967 range of the type. */
1969 static void
1970 test_linenum_comparisons ()
1972 linenum_type min_line (0);
1973 linenum_type max_line (0xffffffff);
1974 ASSERT_EQ (0, compare (min_line, min_line));
1975 ASSERT_EQ (0, compare (max_line, max_line));
1977 ASSERT_GT (compare (max_line, min_line), 0);
1978 ASSERT_LT (compare (min_line, max_line), 0);
1981 /* Helper function for verifying location data: when location_t
1982 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1983 as having column 0. */
1985 static bool
1986 should_have_column_data_p (location_t loc)
1988 if (IS_ADHOC_LOC (loc))
1989 loc = get_location_from_adhoc_loc (line_table, loc);
1990 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1991 return false;
1992 return true;
1995 /* Selftest for should_have_column_data_p. */
1997 static void
1998 test_should_have_column_data_p ()
2000 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
2001 ASSERT_TRUE
2002 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
2003 ASSERT_FALSE
2004 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
2007 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
2008 on LOC. */
2010 static void
2011 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
2012 location_t loc)
2014 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
2015 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
2016 /* If location_t values are sufficiently high, then column numbers
2017 will be unavailable and LOCATION_COLUMN (loc) will be 0.
2018 When close to the threshold, column numbers *may* be present: if
2019 the final linemap before the threshold contains a line that straddles
2020 the threshold, locations in that line have column information. */
2021 if (should_have_column_data_p (loc))
2022 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
2025 /* Various selftests involve constructing a line table and one or more
2026 line maps within it.
2028 For maximum test coverage we want to run these tests with a variety
2029 of situations:
2030 - line_table->default_range_bits: some frontends use a non-zero value
2031 and others use zero
2032 - the fallback modes within line-map.cc: there are various threshold
2033 values for location_t beyond line-map.cc changes
2034 behavior (disabling of the range-packing optimization, disabling
2035 of column-tracking). We can exercise these by starting the line_table
2036 at interesting values at or near these thresholds.
2038 The following struct describes a particular case within our test
2039 matrix. */
2041 class line_table_case
2043 public:
2044 line_table_case (int default_range_bits, int base_location)
2045 : m_default_range_bits (default_range_bits),
2046 m_base_location (base_location)
2049 int m_default_range_bits;
2050 int m_base_location;
2053 /* Constructor. Store the old value of line_table, and create a new
2054 one, using sane defaults. */
2056 line_table_test::line_table_test ()
2058 gcc_assert (saved_line_table == NULL);
2059 saved_line_table = line_table;
2060 line_table = ggc_alloc<line_maps> ();
2061 linemap_init (line_table, BUILTINS_LOCATION);
2062 gcc_assert (saved_line_table->reallocator);
2063 line_table->reallocator = saved_line_table->reallocator;
2064 gcc_assert (saved_line_table->round_alloc_size);
2065 line_table->round_alloc_size = saved_line_table->round_alloc_size;
2066 line_table->default_range_bits = 0;
2069 /* Constructor. Store the old value of line_table, and create a new
2070 one, using the sitation described in CASE_. */
2072 line_table_test::line_table_test (const line_table_case &case_)
2074 gcc_assert (saved_line_table == NULL);
2075 saved_line_table = line_table;
2076 line_table = ggc_alloc<line_maps> ();
2077 linemap_init (line_table, BUILTINS_LOCATION);
2078 gcc_assert (saved_line_table->reallocator);
2079 line_table->reallocator = saved_line_table->reallocator;
2080 gcc_assert (saved_line_table->round_alloc_size);
2081 line_table->round_alloc_size = saved_line_table->round_alloc_size;
2082 line_table->default_range_bits = case_.m_default_range_bits;
2083 if (case_.m_base_location)
2085 line_table->highest_location = case_.m_base_location;
2086 line_table->highest_line = case_.m_base_location;
2090 /* Destructor. Restore the old value of line_table. */
2092 line_table_test::~line_table_test ()
2094 gcc_assert (saved_line_table != NULL);
2095 line_table = saved_line_table;
2096 saved_line_table = NULL;
2099 /* Verify basic operation of ordinary linemaps. */
2101 static void
2102 test_accessing_ordinary_linemaps (const line_table_case &case_)
2104 line_table_test ltt (case_);
2106 /* Build a simple linemap describing some locations. */
2107 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
2109 linemap_line_start (line_table, 1, 100);
2110 location_t loc_a = linemap_position_for_column (line_table, 1);
2111 location_t loc_b = linemap_position_for_column (line_table, 23);
2113 linemap_line_start (line_table, 2, 100);
2114 location_t loc_c = linemap_position_for_column (line_table, 1);
2115 location_t loc_d = linemap_position_for_column (line_table, 17);
2117 /* Example of a very long line. */
2118 linemap_line_start (line_table, 3, 2000);
2119 location_t loc_e = linemap_position_for_column (line_table, 700);
2121 /* Transitioning back to a short line. */
2122 linemap_line_start (line_table, 4, 0);
2123 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
2125 if (should_have_column_data_p (loc_back_to_short))
2127 /* Verify that we switched to short lines in the linemap. */
2128 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
2129 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
2132 /* Example of a line that will eventually be seen to be longer
2133 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
2134 below that. */
2135 linemap_line_start (line_table, 5, 2000);
2137 location_t loc_start_of_very_long_line
2138 = linemap_position_for_column (line_table, 2000);
2139 location_t loc_too_wide
2140 = linemap_position_for_column (line_table, 4097);
2141 location_t loc_too_wide_2
2142 = linemap_position_for_column (line_table, 4098);
2144 /* ...and back to a sane line length. */
2145 linemap_line_start (line_table, 6, 100);
2146 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
2148 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2150 /* Multiple files. */
2151 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
2152 linemap_line_start (line_table, 1, 200);
2153 location_t loc_f = linemap_position_for_column (line_table, 150);
2154 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2156 /* Verify that we can recover the location info. */
2157 assert_loceq ("foo.c", 1, 1, loc_a);
2158 assert_loceq ("foo.c", 1, 23, loc_b);
2159 assert_loceq ("foo.c", 2, 1, loc_c);
2160 assert_loceq ("foo.c", 2, 17, loc_d);
2161 assert_loceq ("foo.c", 3, 700, loc_e);
2162 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
2164 /* In the very wide line, the initial location should be fully tracked. */
2165 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
2166 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
2167 be disabled. */
2168 assert_loceq ("foo.c", 5, 0, loc_too_wide);
2169 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
2170 /*...and column-tracking should be re-enabled for subsequent lines. */
2171 assert_loceq ("foo.c", 6, 10, loc_sane_again);
2173 assert_loceq ("bar.c", 1, 150, loc_f);
2175 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2176 ASSERT_TRUE (pure_location_p (line_table, loc_a));
2178 /* Verify using make_location to build a range, and extracting data
2179 back from it. */
2180 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2181 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2182 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2183 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2184 ASSERT_EQ (loc_b, src_range.m_start);
2185 ASSERT_EQ (loc_d, src_range.m_finish);
2188 /* Verify various properties of UNKNOWN_LOCATION. */
2190 static void
2191 test_unknown_location ()
2193 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2194 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2195 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2198 /* Verify various properties of BUILTINS_LOCATION. */
2200 static void
2201 test_builtins ()
2203 assert_loceq (special_fname_builtin (), 0, 0, BUILTINS_LOCATION);
2204 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2207 /* Regression test for make_location.
2208 Ensure that we use pure locations for the start/finish of the range,
2209 rather than storing a packed or ad-hoc range as the start/finish. */
2211 static void
2212 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2214 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2215 with C++ frontend.
2216 ....................0000000001111111111222.
2217 ....................1234567890123456789012. */
2218 const char *content = " r += !aaa == bbb;\n";
2219 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2220 line_table_test ltt (case_);
2221 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2223 const location_t c11 = linemap_position_for_column (line_table, 11);
2224 const location_t c12 = linemap_position_for_column (line_table, 12);
2225 const location_t c13 = linemap_position_for_column (line_table, 13);
2226 const location_t c14 = linemap_position_for_column (line_table, 14);
2227 const location_t c21 = linemap_position_for_column (line_table, 21);
2229 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2230 return;
2232 /* Use column 13 for the caret location, arbitrarily, to verify that we
2233 handle start != caret. */
2234 const location_t aaa = make_location (c13, c12, c14);
2235 ASSERT_EQ (c13, get_pure_location (aaa));
2236 ASSERT_EQ (c12, get_start (aaa));
2237 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2238 ASSERT_EQ (c14, get_finish (aaa));
2239 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2241 /* Make a location using a location with a range as the start-point. */
2242 const location_t not_aaa = make_location (c11, aaa, c14);
2243 ASSERT_EQ (c11, get_pure_location (not_aaa));
2244 /* It should use the start location of the range, not store the range
2245 itself. */
2246 ASSERT_EQ (c12, get_start (not_aaa));
2247 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2248 ASSERT_EQ (c14, get_finish (not_aaa));
2249 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2251 /* Similarly, make a location with a range as the end-point. */
2252 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2253 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2254 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2255 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2256 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2257 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2258 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2259 /* It should use the finish location of the range, not store the range
2260 itself. */
2261 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2262 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2263 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2264 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2265 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2268 /* Verify reading of input files (e.g. for caret-based diagnostics). */
2270 static void
2271 test_reading_source_line ()
2273 /* Create a tempfile and write some text to it. */
2274 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2275 "01234567890123456789\n"
2276 "This is the test text\n"
2277 "This is the 3rd line");
2279 /* Read back a specific line from the tempfile. */
2280 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2281 ASSERT_TRUE (source_line);
2282 ASSERT_TRUE (source_line.get_buffer () != NULL);
2283 ASSERT_EQ (20, source_line.length ());
2284 ASSERT_TRUE (!strncmp ("This is the 3rd line",
2285 source_line.get_buffer (), source_line.length ()));
2287 source_line = location_get_source_line (tmp.get_filename (), 2);
2288 ASSERT_TRUE (source_line);
2289 ASSERT_TRUE (source_line.get_buffer () != NULL);
2290 ASSERT_EQ (21, source_line.length ());
2291 ASSERT_TRUE (!strncmp ("This is the test text",
2292 source_line.get_buffer (), source_line.length ()));
2294 source_line = location_get_source_line (tmp.get_filename (), 4);
2295 ASSERT_FALSE (source_line);
2296 ASSERT_TRUE (source_line.get_buffer () == NULL);
2299 /* Tests of lexing. */
2301 /* Verify that token TOK from PARSER has cpp_token_as_text
2302 equal to EXPECTED_TEXT. */
2304 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
2305 SELFTEST_BEGIN_STMT \
2306 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
2307 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
2308 SELFTEST_END_STMT
2310 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2311 and ranges from EXP_START_COL to EXP_FINISH_COL.
2312 Use LOC as the effective location of the selftest. */
2314 static void
2315 assert_token_loc_eq (const location &loc,
2316 const cpp_token *tok,
2317 const char *exp_filename, int exp_linenum,
2318 int exp_start_col, int exp_finish_col)
2320 location_t tok_loc = tok->src_loc;
2321 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2322 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2324 /* If location_t values are sufficiently high, then column numbers
2325 will be unavailable. */
2326 if (!should_have_column_data_p (tok_loc))
2327 return;
2329 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2330 source_range tok_range = get_range_from_loc (line_table, tok_loc);
2331 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2332 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2335 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2336 SELFTEST_LOCATION as the effective location of the selftest. */
2338 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2339 EXP_START_COL, EXP_FINISH_COL) \
2340 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2341 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2343 /* Test of lexing a file using libcpp, verifying tokens and their
2344 location information. */
2346 static void
2347 test_lexer (const line_table_case &case_)
2349 /* Create a tempfile and write some text to it. */
2350 const char *content =
2351 /*00000000011111111112222222222333333.3333444444444.455555555556
2352 12345678901234567890123456789012345.6789012345678.901234567890. */
2353 ("test_name /* c-style comment */\n"
2354 " \"test literal\"\n"
2355 " // test c++-style comment\n"
2356 " 42\n");
2357 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2359 line_table_test ltt (case_);
2361 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2363 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2364 ASSERT_NE (fname, NULL);
2366 /* Verify that we get the expected tokens back, with the correct
2367 location information. */
2369 location_t loc;
2370 const cpp_token *tok;
2371 tok = cpp_get_token_with_location (parser, &loc);
2372 ASSERT_NE (tok, NULL);
2373 ASSERT_EQ (tok->type, CPP_NAME);
2374 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2375 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2377 tok = cpp_get_token_with_location (parser, &loc);
2378 ASSERT_NE (tok, NULL);
2379 ASSERT_EQ (tok->type, CPP_STRING);
2380 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2381 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2383 tok = cpp_get_token_with_location (parser, &loc);
2384 ASSERT_NE (tok, NULL);
2385 ASSERT_EQ (tok->type, CPP_NUMBER);
2386 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2387 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2389 tok = cpp_get_token_with_location (parser, &loc);
2390 ASSERT_NE (tok, NULL);
2391 ASSERT_EQ (tok->type, CPP_EOF);
2393 cpp_finish (parser, NULL);
2394 cpp_destroy (parser);
2397 /* Forward decls. */
2399 class lexer_test;
2400 class lexer_test_options;
2402 /* A class for specifying options of a lexer_test.
2403 The "apply" vfunc is called during the lexer_test constructor. */
2405 class lexer_test_options
2407 public:
2408 virtual void apply (lexer_test &) = 0;
2411 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2412 in its dtor.
2414 This is needed by struct lexer_test to ensure that the cleanup of the
2415 cpp_reader happens *after* the cleanup of the temp_source_file. */
2417 class cpp_reader_ptr
2419 public:
2420 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2422 ~cpp_reader_ptr ()
2424 cpp_finish (m_ptr, NULL);
2425 cpp_destroy (m_ptr);
2428 operator cpp_reader * () const { return m_ptr; }
2430 private:
2431 cpp_reader *m_ptr;
2434 /* A struct for writing lexer tests. */
2436 class lexer_test
2438 public:
2439 lexer_test (const line_table_case &case_, const char *content,
2440 lexer_test_options *options);
2441 ~lexer_test ();
2443 const cpp_token *get_token ();
2445 /* The ordering of these fields matters.
2446 The line_table_test must be first, since the cpp_reader_ptr
2447 uses it.
2448 The cpp_reader must be cleaned up *after* the temp_source_file
2449 since the filenames in input.cc's input cache are owned by the
2450 cpp_reader; in particular, when ~temp_source_file evicts the
2451 filename the filenames must still be alive. */
2452 line_table_test m_ltt;
2453 cpp_reader_ptr m_parser;
2454 temp_source_file m_tempfile;
2455 string_concat_db m_concats;
2456 bool m_implicitly_expect_EOF;
2459 /* Use an EBCDIC encoding for the execution charset, specifically
2460 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2462 This exercises iconv integration within libcpp.
2463 Not every build of iconv supports the given charset,
2464 so we need to flag this error and handle it gracefully. */
2466 class ebcdic_execution_charset : public lexer_test_options
2468 public:
2469 ebcdic_execution_charset () : m_num_iconv_errors (0)
2471 gcc_assert (s_singleton == NULL);
2472 s_singleton = this;
2474 ~ebcdic_execution_charset ()
2476 gcc_assert (s_singleton == this);
2477 s_singleton = NULL;
2480 void apply (lexer_test &test) final override
2482 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2483 cpp_opts->narrow_charset = "IBM1047";
2485 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2486 callbacks->diagnostic = on_diagnostic;
2489 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2490 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2491 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2492 rich_location *richloc ATTRIBUTE_UNUSED,
2493 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2494 ATTRIBUTE_FPTR_PRINTF(5,0)
2496 gcc_assert (s_singleton);
2497 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2498 const char *msg = "conversion from %s to %s not supported by iconv";
2499 #ifdef ENABLE_NLS
2500 msg = dgettext ("cpplib", msg);
2501 #endif
2502 /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
2503 when the local iconv build doesn't support the conversion. */
2504 if (strcmp (msgid, msg) == 0)
2506 s_singleton->m_num_iconv_errors++;
2507 return true;
2510 /* Otherwise, we have an unexpected error. */
2511 abort ();
2514 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2516 private:
2517 static ebcdic_execution_charset *s_singleton;
2518 int m_num_iconv_errors;
2521 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2523 /* A lexer_test_options subclass that records a list of diagnostic
2524 messages emitted by the lexer. */
2526 class lexer_diagnostic_sink : public lexer_test_options
2528 public:
2529 lexer_diagnostic_sink ()
2531 gcc_assert (s_singleton == NULL);
2532 s_singleton = this;
2534 ~lexer_diagnostic_sink ()
2536 gcc_assert (s_singleton == this);
2537 s_singleton = NULL;
2539 int i;
2540 char *str;
2541 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2542 free (str);
2545 void apply (lexer_test &test) final override
2547 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2548 callbacks->diagnostic = on_diagnostic;
2551 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2552 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2553 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2554 rich_location *richloc ATTRIBUTE_UNUSED,
2555 const char *msgid, va_list *ap)
2556 ATTRIBUTE_FPTR_PRINTF(5,0)
2558 char *msg = xvasprintf (msgid, *ap);
2559 s_singleton->m_diagnostics.safe_push (msg);
2560 return true;
2563 auto_vec<char *> m_diagnostics;
2565 private:
2566 static lexer_diagnostic_sink *s_singleton;
2569 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2571 /* Constructor. Override line_table with a new instance based on CASE_,
2572 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2573 start parsing the tempfile. */
2575 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2576 lexer_test_options *options)
2577 : m_ltt (case_),
2578 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2579 /* Create a tempfile and write the text to it. */
2580 m_tempfile (SELFTEST_LOCATION, ".c", content),
2581 m_concats (),
2582 m_implicitly_expect_EOF (true)
2584 if (options)
2585 options->apply (*this);
2587 cpp_init_iconv (m_parser);
2589 /* Parse the file. */
2590 const char *fname = cpp_read_main_file (m_parser,
2591 m_tempfile.get_filename ());
2592 ASSERT_NE (fname, NULL);
2595 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2597 lexer_test::~lexer_test ()
2599 location_t loc;
2600 const cpp_token *tok;
2602 if (m_implicitly_expect_EOF)
2604 tok = cpp_get_token_with_location (m_parser, &loc);
2605 ASSERT_NE (tok, NULL);
2606 ASSERT_EQ (tok->type, CPP_EOF);
2610 /* Get the next token from m_parser. */
2612 const cpp_token *
2613 lexer_test::get_token ()
2615 location_t loc;
2616 const cpp_token *tok;
2618 tok = cpp_get_token_with_location (m_parser, &loc);
2619 ASSERT_NE (tok, NULL);
2620 return tok;
2623 /* Verify that locations within string literals are correctly handled. */
2625 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2626 using the string concatenation database for TEST.
2628 Assert that the character at index IDX is on EXPECTED_LINE,
2629 and that it begins at column EXPECTED_START_COL and ends at
2630 EXPECTED_FINISH_COL (unless the locations are beyond
2631 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2632 columns). */
2634 static void
2635 assert_char_at_range (const location &loc,
2636 lexer_test& test,
2637 location_t strloc, enum cpp_ttype type, int idx,
2638 int expected_line, int expected_start_col,
2639 int expected_finish_col)
2641 cpp_reader *pfile = test.m_parser;
2642 string_concat_db *concats = &test.m_concats;
2644 source_range actual_range = source_range();
2645 const char *err
2646 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2647 &actual_range);
2648 if (should_have_column_data_p (strloc))
2649 ASSERT_EQ_AT (loc, NULL, err);
2650 else
2652 ASSERT_STREQ_AT (loc,
2653 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2654 err);
2655 return;
2658 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2659 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2660 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2661 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2663 if (should_have_column_data_p (actual_range.m_start))
2665 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2666 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2668 if (should_have_column_data_p (actual_range.m_finish))
2670 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2671 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2675 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2676 the effective location of any errors. */
2678 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2679 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2680 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2681 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2682 (EXPECTED_FINISH_COL))
2684 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2685 using the string concatenation database for TEST.
2687 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2689 static void
2690 assert_num_substring_ranges (const location &loc,
2691 lexer_test& test,
2692 location_t strloc,
2693 enum cpp_ttype type,
2694 int expected_num_ranges)
2696 cpp_reader *pfile = test.m_parser;
2697 string_concat_db *concats = &test.m_concats;
2699 int actual_num_ranges = -1;
2700 const char *err
2701 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2702 &actual_num_ranges);
2703 if (should_have_column_data_p (strloc))
2704 ASSERT_EQ_AT (loc, NULL, err);
2705 else
2707 ASSERT_STREQ_AT (loc,
2708 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2709 err);
2710 return;
2712 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2715 /* Macro for calling assert_num_substring_ranges, supplying
2716 SELFTEST_LOCATION for the effective location of any errors. */
2718 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2719 EXPECTED_NUM_RANGES) \
2720 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2721 (TYPE), (EXPECTED_NUM_RANGES))
2724 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2725 returns an error (using the string concatenation database for TEST). */
2727 static void
2728 assert_has_no_substring_ranges (const location &loc,
2729 lexer_test& test,
2730 location_t strloc,
2731 enum cpp_ttype type,
2732 const char *expected_err)
2734 cpp_reader *pfile = test.m_parser;
2735 string_concat_db *concats = &test.m_concats;
2736 cpp_substring_ranges ranges;
2737 const char *actual_err
2738 = get_substring_ranges_for_loc (pfile, concats, strloc,
2739 type, ranges);
2740 if (should_have_column_data_p (strloc))
2741 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2742 else
2743 ASSERT_STREQ_AT (loc,
2744 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2745 actual_err);
2748 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2749 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2750 (STRLOC), (TYPE), (ERR))
2752 /* Lex a simple string literal. Verify the substring location data, before
2753 and after running cpp_interpret_string on it. */
2755 static void
2756 test_lexer_string_locations_simple (const line_table_case &case_)
2758 /* Digits 0-9 (with 0 at column 10), the simple way.
2759 ....................000000000.11111111112.2222222223333333333
2760 ....................123456789.01234567890.1234567890123456789
2761 We add a trailing comment to ensure that we correctly locate
2762 the end of the string literal token. */
2763 const char *content = " \"0123456789\" /* not a string */\n";
2764 lexer_test test (case_, content, NULL);
2766 /* Verify that we get the expected token back, with the correct
2767 location information. */
2768 const cpp_token *tok = test.get_token ();
2769 ASSERT_EQ (tok->type, CPP_STRING);
2770 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2771 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2773 /* At this point in lexing, the quote characters are treated as part of
2774 the string (they are stripped off by cpp_interpret_string). */
2776 ASSERT_EQ (tok->val.str.len, 12);
2778 /* Verify that cpp_interpret_string works. */
2779 cpp_string dst_string;
2780 const enum cpp_ttype type = CPP_STRING;
2781 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2782 &dst_string, type);
2783 ASSERT_TRUE (result);
2784 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2785 free (const_cast <unsigned char *> (dst_string.text));
2787 /* Verify ranges of individual characters. This no longer includes the
2788 opening quote, but does include the closing quote. */
2789 for (int i = 0; i <= 10; i++)
2790 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2791 10 + i, 10 + i);
2793 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2796 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2797 encoding. */
2799 static void
2800 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2802 /* EBCDIC support requires iconv. */
2803 if (!HAVE_ICONV)
2804 return;
2806 /* Digits 0-9 (with 0 at column 10), the simple way.
2807 ....................000000000.11111111112.2222222223333333333
2808 ....................123456789.01234567890.1234567890123456789
2809 We add a trailing comment to ensure that we correctly locate
2810 the end of the string literal token. */
2811 const char *content = " \"0123456789\" /* not a string */\n";
2812 ebcdic_execution_charset use_ebcdic;
2813 lexer_test test (case_, content, &use_ebcdic);
2815 /* Verify that we get the expected token back, with the correct
2816 location information. */
2817 const cpp_token *tok = test.get_token ();
2818 ASSERT_EQ (tok->type, CPP_STRING);
2819 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2820 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2822 /* At this point in lexing, the quote characters are treated as part of
2823 the string (they are stripped off by cpp_interpret_string). */
2825 ASSERT_EQ (tok->val.str.len, 12);
2827 /* The remainder of the test requires an iconv implementation that
2828 can convert from UTF-8 to the EBCDIC encoding requested above. */
2829 if (use_ebcdic.iconv_errors_occurred_p ())
2830 return;
2832 /* Verify that cpp_interpret_string works. */
2833 cpp_string dst_string;
2834 const enum cpp_ttype type = CPP_STRING;
2835 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2836 &dst_string, type);
2837 ASSERT_TRUE (result);
2838 /* We should now have EBCDIC-encoded text, specifically
2839 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2840 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2841 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2842 (const char *)dst_string.text);
2843 free (const_cast <unsigned char *> (dst_string.text));
2845 /* Verify that we don't attempt to record substring location information
2846 for such cases. */
2847 ASSERT_HAS_NO_SUBSTRING_RANGES
2848 (test, tok->src_loc, type,
2849 "execution character set != source character set");
2852 /* Lex a string literal containing a hex-escaped character.
2853 Verify the substring location data, before and after running
2854 cpp_interpret_string on it. */
2856 static void
2857 test_lexer_string_locations_hex (const line_table_case &case_)
2859 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2860 and with a space in place of digit 6, to terminate the escaped
2861 hex code.
2862 ....................000000000.111111.11112222.
2863 ....................123456789.012345.67890123. */
2864 const char *content = " \"01234\\x35 789\"\n";
2865 lexer_test test (case_, content, NULL);
2867 /* Verify that we get the expected token back, with the correct
2868 location information. */
2869 const cpp_token *tok = test.get_token ();
2870 ASSERT_EQ (tok->type, CPP_STRING);
2871 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2872 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2874 /* At this point in lexing, the quote characters are treated as part of
2875 the string (they are stripped off by cpp_interpret_string). */
2876 ASSERT_EQ (tok->val.str.len, 15);
2878 /* Verify that cpp_interpret_string works. */
2879 cpp_string dst_string;
2880 const enum cpp_ttype type = CPP_STRING;
2881 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2882 &dst_string, type);
2883 ASSERT_TRUE (result);
2884 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2885 free (const_cast <unsigned char *> (dst_string.text));
2887 /* Verify ranges of individual characters. This no longer includes the
2888 opening quote, but does include the closing quote. */
2889 for (int i = 0; i <= 4; i++)
2890 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2891 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2892 for (int i = 6; i <= 10; i++)
2893 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2895 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2898 /* Lex a string literal containing an octal-escaped character.
2899 Verify the substring location data after running cpp_interpret_string
2900 on it. */
2902 static void
2903 test_lexer_string_locations_oct (const line_table_case &case_)
2905 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2906 and with a space in place of digit 6, to terminate the escaped
2907 octal code.
2908 ....................000000000.111111.11112222.2222223333333333444
2909 ....................123456789.012345.67890123.4567890123456789012 */
2910 const char *content = " \"01234\\065 789\" /* not a string */\n";
2911 lexer_test test (case_, content, NULL);
2913 /* Verify that we get the expected token back, with the correct
2914 location information. */
2915 const cpp_token *tok = test.get_token ();
2916 ASSERT_EQ (tok->type, CPP_STRING);
2917 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2919 /* Verify that cpp_interpret_string works. */
2920 cpp_string dst_string;
2921 const enum cpp_ttype type = CPP_STRING;
2922 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2923 &dst_string, type);
2924 ASSERT_TRUE (result);
2925 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2926 free (const_cast <unsigned char *> (dst_string.text));
2928 /* Verify ranges of individual characters. This no longer includes the
2929 opening quote, but does include the closing quote. */
2930 for (int i = 0; i < 5; i++)
2931 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2932 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2933 for (int i = 6; i <= 10; i++)
2934 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2936 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2939 /* Test of string literal containing letter escapes. */
2941 static void
2942 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2944 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2945 .....................000000000.1.11111.1.1.11222.22222223333333
2946 .....................123456789.0.12345.6.7.89012.34567890123456. */
2947 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2948 lexer_test test (case_, content, NULL);
2950 /* Verify that we get the expected tokens back. */
2951 const cpp_token *tok = test.get_token ();
2952 ASSERT_EQ (tok->type, CPP_STRING);
2953 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2955 /* Verify ranges of individual characters. */
2956 /* "\t". */
2957 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2958 0, 1, 10, 11);
2959 /* "foo". */
2960 for (int i = 1; i <= 3; i++)
2961 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2962 i, 1, 11 + i, 11 + i);
2963 /* "\\" and "\n". */
2964 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2965 4, 1, 15, 16);
2966 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2967 5, 1, 17, 18);
2969 /* "bar" and closing quote for nul-terminator. */
2970 for (int i = 6; i <= 9; i++)
2971 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2972 i, 1, 13 + i, 13 + i);
2974 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2977 /* Another test of a string literal containing a letter escape.
2978 Based on string seen in
2979 printf ("%-%\n");
2980 in gcc.dg/format/c90-printf-1.c. */
2982 static void
2983 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2985 /* .....................000000000.1111.11.1111.22222222223.
2986 .....................123456789.0123.45.6789.01234567890. */
2987 const char *content = (" \"%-%\\n\" /* non-str */\n");
2988 lexer_test test (case_, content, NULL);
2990 /* Verify that we get the expected tokens back. */
2991 const cpp_token *tok = test.get_token ();
2992 ASSERT_EQ (tok->type, CPP_STRING);
2993 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2995 /* Verify ranges of individual characters. */
2996 /* "%-%". */
2997 for (int i = 0; i < 3; i++)
2998 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2999 i, 1, 10 + i, 10 + i);
3000 /* "\n". */
3001 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3002 3, 1, 13, 14);
3004 /* Closing quote for nul-terminator. */
3005 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3006 4, 1, 15, 15);
3008 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
3011 /* Lex a string literal containing UCN 4 characters.
3012 Verify the substring location data after running cpp_interpret_string
3013 on it. */
3015 static void
3016 test_lexer_string_locations_ucn4 (const line_table_case &case_)
3018 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
3019 as UCN 4.
3020 ....................000000000.111111.111122.222222223.33333333344444
3021 ....................123456789.012345.678901.234567890.12345678901234 */
3022 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
3023 lexer_test test (case_, content, NULL);
3025 /* Verify that we get the expected token back, with the correct
3026 location information. */
3027 const cpp_token *tok = test.get_token ();
3028 ASSERT_EQ (tok->type, CPP_STRING);
3029 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
3031 /* Verify that cpp_interpret_string works.
3032 The string should be encoded in the execution character
3033 set. Assuming that is UTF-8, we should have the following:
3034 ----------- ---- ----- ------- ----------------
3035 Byte offset Byte Octal Unicode Source Column(s)
3036 ----------- ---- ----- ------- ----------------
3037 0 0x30 '0' 10
3038 1 0x31 '1' 11
3039 2 0x32 '2' 12
3040 3 0x33 '3' 13
3041 4 0x34 '4' 14
3042 5 0xE2 \342 U+2174 15-20
3043 6 0x85 \205 (cont) 15-20
3044 7 0xB4 \264 (cont) 15-20
3045 8 0xE2 \342 U+2175 21-26
3046 9 0x85 \205 (cont) 21-26
3047 10 0xB5 \265 (cont) 21-26
3048 11 0x37 '7' 27
3049 12 0x38 '8' 28
3050 13 0x39 '9' 29
3051 14 0x00 30 (closing quote)
3052 ----------- ---- ----- ------- ---------------. */
3054 cpp_string dst_string;
3055 const enum cpp_ttype type = CPP_STRING;
3056 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3057 &dst_string, type);
3058 ASSERT_TRUE (result);
3059 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3060 (const char *)dst_string.text);
3061 free (const_cast <unsigned char *> (dst_string.text));
3063 /* Verify ranges of individual characters. This no longer includes the
3064 opening quote, but does include the closing quote.
3065 '01234'. */
3066 for (int i = 0; i <= 4; i++)
3067 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3068 /* U+2174. */
3069 for (int i = 5; i <= 7; i++)
3070 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
3071 /* U+2175. */
3072 for (int i = 8; i <= 10; i++)
3073 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
3074 /* '789' and nul terminator */
3075 for (int i = 11; i <= 14; i++)
3076 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
3078 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3081 /* Lex a string literal containing UCN 8 characters.
3082 Verify the substring location data after running cpp_interpret_string
3083 on it. */
3085 static void
3086 test_lexer_string_locations_ucn8 (const line_table_case &case_)
3088 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
3089 ....................000000000.111111.1111222222.2222333333333.344444
3090 ....................123456789.012345.6789012345.6789012345678.901234 */
3091 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
3092 lexer_test test (case_, content, NULL);
3094 /* Verify that we get the expected token back, with the correct
3095 location information. */
3096 const cpp_token *tok = test.get_token ();
3097 ASSERT_EQ (tok->type, CPP_STRING);
3098 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
3099 "\"01234\\U00002174\\U00002175789\"");
3101 /* Verify that cpp_interpret_string works.
3102 The UTF-8 encoding of the string is identical to that from
3103 the ucn4 testcase above; the only difference is the column
3104 locations. */
3105 cpp_string dst_string;
3106 const enum cpp_ttype type = CPP_STRING;
3107 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3108 &dst_string, type);
3109 ASSERT_TRUE (result);
3110 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3111 (const char *)dst_string.text);
3112 free (const_cast <unsigned char *> (dst_string.text));
3114 /* Verify ranges of individual characters. This no longer includes the
3115 opening quote, but does include the closing quote.
3116 '01234'. */
3117 for (int i = 0; i <= 4; i++)
3118 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3119 /* U+2174. */
3120 for (int i = 5; i <= 7; i++)
3121 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
3122 /* U+2175. */
3123 for (int i = 8; i <= 10; i++)
3124 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
3125 /* '789' at columns 35-37 */
3126 for (int i = 11; i <= 13; i++)
3127 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
3128 /* Closing quote/nul-terminator at column 38. */
3129 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
3131 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3134 /* Fetch a big-endian 32-bit value and convert to host endianness. */
3136 static uint32_t
3137 uint32_from_big_endian (const uint32_t *ptr_be_value)
3139 const unsigned char *buf = (const unsigned char *)ptr_be_value;
3140 return (((uint32_t) buf[0] << 24)
3141 | ((uint32_t) buf[1] << 16)
3142 | ((uint32_t) buf[2] << 8)
3143 | (uint32_t) buf[3]);
3146 /* Lex a wide string literal and verify that attempts to read substring
3147 location data from it fail gracefully. */
3149 static void
3150 test_lexer_string_locations_wide_string (const line_table_case &case_)
3152 /* Digits 0-9.
3153 ....................000000000.11111111112.22222222233333
3154 ....................123456789.01234567890.12345678901234 */
3155 const char *content = " L\"0123456789\" /* non-str */\n";
3156 lexer_test test (case_, content, NULL);
3158 /* Verify that we get the expected token back, with the correct
3159 location information. */
3160 const cpp_token *tok = test.get_token ();
3161 ASSERT_EQ (tok->type, CPP_WSTRING);
3162 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
3164 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
3165 cpp_string dst_string;
3166 const enum cpp_ttype type = CPP_WSTRING;
3167 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3168 &dst_string, type);
3169 ASSERT_TRUE (result);
3170 /* The cpp_reader defaults to big-endian with
3171 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3172 now be encoded as UTF-32BE. */
3173 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3174 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3175 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3176 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3177 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3178 free (const_cast <unsigned char *> (dst_string.text));
3180 /* We don't yet support generating substring location information
3181 for L"" strings. */
3182 ASSERT_HAS_NO_SUBSTRING_RANGES
3183 (test, tok->src_loc, type,
3184 "execution character set != source character set");
3187 /* Fetch a big-endian 16-bit value and convert to host endianness. */
3189 static uint16_t
3190 uint16_from_big_endian (const uint16_t *ptr_be_value)
3192 const unsigned char *buf = (const unsigned char *)ptr_be_value;
3193 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3196 /* Lex a u"" string literal and verify that attempts to read substring
3197 location data from it fail gracefully. */
3199 static void
3200 test_lexer_string_locations_string16 (const line_table_case &case_)
3202 /* Digits 0-9.
3203 ....................000000000.11111111112.22222222233333
3204 ....................123456789.01234567890.12345678901234 */
3205 const char *content = " u\"0123456789\" /* non-str */\n";
3206 lexer_test test (case_, content, NULL);
3208 /* Verify that we get the expected token back, with the correct
3209 location information. */
3210 const cpp_token *tok = test.get_token ();
3211 ASSERT_EQ (tok->type, CPP_STRING16);
3212 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3214 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
3215 cpp_string dst_string;
3216 const enum cpp_ttype type = CPP_STRING16;
3217 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3218 &dst_string, type);
3219 ASSERT_TRUE (result);
3221 /* The cpp_reader defaults to big-endian, so dst_string should
3222 now be encoded as UTF-16BE. */
3223 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3224 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3225 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3226 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3227 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3228 free (const_cast <unsigned char *> (dst_string.text));
3230 /* We don't yet support generating substring location information
3231 for L"" strings. */
3232 ASSERT_HAS_NO_SUBSTRING_RANGES
3233 (test, tok->src_loc, type,
3234 "execution character set != source character set");
3237 /* Lex a U"" string literal and verify that attempts to read substring
3238 location data from it fail gracefully. */
3240 static void
3241 test_lexer_string_locations_string32 (const line_table_case &case_)
3243 /* Digits 0-9.
3244 ....................000000000.11111111112.22222222233333
3245 ....................123456789.01234567890.12345678901234 */
3246 const char *content = " U\"0123456789\" /* non-str */\n";
3247 lexer_test test (case_, content, NULL);
3249 /* Verify that we get the expected token back, with the correct
3250 location information. */
3251 const cpp_token *tok = test.get_token ();
3252 ASSERT_EQ (tok->type, CPP_STRING32);
3253 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3255 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
3256 cpp_string dst_string;
3257 const enum cpp_ttype type = CPP_STRING32;
3258 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3259 &dst_string, type);
3260 ASSERT_TRUE (result);
3262 /* The cpp_reader defaults to big-endian, so dst_string should
3263 now be encoded as UTF-32BE. */
3264 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3265 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3266 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3267 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3268 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3269 free (const_cast <unsigned char *> (dst_string.text));
3271 /* We don't yet support generating substring location information
3272 for L"" strings. */
3273 ASSERT_HAS_NO_SUBSTRING_RANGES
3274 (test, tok->src_loc, type,
3275 "execution character set != source character set");
3278 /* Lex a u8-string literal.
3279 Verify the substring location data after running cpp_interpret_string
3280 on it. */
3282 static void
3283 test_lexer_string_locations_u8 (const line_table_case &case_)
3285 /* Digits 0-9.
3286 ....................000000000.11111111112.22222222233333
3287 ....................123456789.01234567890.12345678901234 */
3288 const char *content = " u8\"0123456789\" /* non-str */\n";
3289 lexer_test test (case_, content, NULL);
3291 /* Verify that we get the expected token back, with the correct
3292 location information. */
3293 const cpp_token *tok = test.get_token ();
3294 ASSERT_EQ (tok->type, CPP_UTF8STRING);
3295 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3297 /* Verify that cpp_interpret_string works. */
3298 cpp_string dst_string;
3299 const enum cpp_ttype type = CPP_STRING;
3300 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3301 &dst_string, type);
3302 ASSERT_TRUE (result);
3303 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3304 free (const_cast <unsigned char *> (dst_string.text));
3306 /* Verify ranges of individual characters. This no longer includes the
3307 opening quote, but does include the closing quote. */
3308 for (int i = 0; i <= 10; i++)
3309 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3312 /* Lex a string literal containing UTF-8 source characters.
3313 Verify the substring location data after running cpp_interpret_string
3314 on it. */
3316 static void
3317 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3319 /* This string literal is written out to the source file as UTF-8,
3320 and is of the form "before mojibake after", where "mojibake"
3321 is written as the following four unicode code points:
3322 U+6587 CJK UNIFIED IDEOGRAPH-6587
3323 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3324 U+5316 CJK UNIFIED IDEOGRAPH-5316
3325 U+3051 HIRAGANA LETTER KE.
3326 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3327 "before" and "after" are 1 byte per unicode character.
3329 The numbering shown are "columns", which are *byte* numbers within
3330 the line, rather than unicode character numbers.
3332 .................... 000000000.1111111.
3333 .................... 123456789.0123456. */
3334 const char *content = (" \"before "
3335 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3336 UTF-8: 0xE6 0x96 0x87
3337 C octal escaped UTF-8: \346\226\207
3338 "column" numbers: 17-19. */
3339 "\346\226\207"
3341 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3342 UTF-8: 0xE5 0xAD 0x97
3343 C octal escaped UTF-8: \345\255\227
3344 "column" numbers: 20-22. */
3345 "\345\255\227"
3347 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3348 UTF-8: 0xE5 0x8C 0x96
3349 C octal escaped UTF-8: \345\214\226
3350 "column" numbers: 23-25. */
3351 "\345\214\226"
3353 /* U+3051 HIRAGANA LETTER KE
3354 UTF-8: 0xE3 0x81 0x91
3355 C octal escaped UTF-8: \343\201\221
3356 "column" numbers: 26-28. */
3357 "\343\201\221"
3359 /* column numbers 29 onwards
3360 2333333.33334444444444
3361 9012345.67890123456789. */
3362 " after\" /* non-str */\n");
3363 lexer_test test (case_, content, NULL);
3365 /* Verify that we get the expected token back, with the correct
3366 location information. */
3367 const cpp_token *tok = test.get_token ();
3368 ASSERT_EQ (tok->type, CPP_STRING);
3369 ASSERT_TOKEN_AS_TEXT_EQ
3370 (test.m_parser, tok,
3371 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3373 /* Verify that cpp_interpret_string works. */
3374 cpp_string dst_string;
3375 const enum cpp_ttype type = CPP_STRING;
3376 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3377 &dst_string, type);
3378 ASSERT_TRUE (result);
3379 ASSERT_STREQ
3380 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3381 (const char *)dst_string.text);
3382 free (const_cast <unsigned char *> (dst_string.text));
3384 /* Verify ranges of individual characters. This no longer includes the
3385 opening quote, but does include the closing quote.
3386 Assuming that both source and execution encodings are UTF-8, we have
3387 a run of 25 octets in each, plus the NUL terminator. */
3388 for (int i = 0; i < 25; i++)
3389 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3390 /* NUL-terminator should use the closing quote at column 35. */
3391 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3393 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3396 /* Test of string literal concatenation. */
3398 static void
3399 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3401 /* Digits 0-9.
3402 .....................000000000.111111.11112222222222
3403 .....................123456789.012345.67890123456789. */
3404 const char *content = (" \"01234\" /* non-str */\n"
3405 " \"56789\" /* non-str */\n");
3406 lexer_test test (case_, content, NULL);
3408 location_t input_locs[2];
3410 /* Verify that we get the expected tokens back. */
3411 auto_vec <cpp_string> input_strings;
3412 const cpp_token *tok_a = test.get_token ();
3413 ASSERT_EQ (tok_a->type, CPP_STRING);
3414 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3415 input_strings.safe_push (tok_a->val.str);
3416 input_locs[0] = tok_a->src_loc;
3418 const cpp_token *tok_b = test.get_token ();
3419 ASSERT_EQ (tok_b->type, CPP_STRING);
3420 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3421 input_strings.safe_push (tok_b->val.str);
3422 input_locs[1] = tok_b->src_loc;
3424 /* Verify that cpp_interpret_string works. */
3425 cpp_string dst_string;
3426 const enum cpp_ttype type = CPP_STRING;
3427 bool result = cpp_interpret_string (test.m_parser,
3428 input_strings.address (), 2,
3429 &dst_string, type);
3430 ASSERT_TRUE (result);
3431 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3432 free (const_cast <unsigned char *> (dst_string.text));
3434 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3435 test.m_concats.record_string_concatenation (2, input_locs);
3437 location_t initial_loc = input_locs[0];
3439 /* "01234" on line 1. */
3440 for (int i = 0; i <= 4; i++)
3441 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3442 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3443 for (int i = 5; i <= 10; i++)
3444 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3446 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3449 /* Another test of string literal concatenation. */
3451 static void
3452 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3454 /* Digits 0-9.
3455 .....................000000000.111.11111112222222
3456 .....................123456789.012.34567890123456. */
3457 const char *content = (" \"01\" /* non-str */\n"
3458 " \"23\" /* non-str */\n"
3459 " \"45\" /* non-str */\n"
3460 " \"67\" /* non-str */\n"
3461 " \"89\" /* non-str */\n");
3462 lexer_test test (case_, content, NULL);
3464 auto_vec <cpp_string> input_strings;
3465 location_t input_locs[5];
3467 /* Verify that we get the expected tokens back. */
3468 for (int i = 0; i < 5; i++)
3470 const cpp_token *tok = test.get_token ();
3471 ASSERT_EQ (tok->type, CPP_STRING);
3472 input_strings.safe_push (tok->val.str);
3473 input_locs[i] = tok->src_loc;
3476 /* Verify that cpp_interpret_string works. */
3477 cpp_string dst_string;
3478 const enum cpp_ttype type = CPP_STRING;
3479 bool result = cpp_interpret_string (test.m_parser,
3480 input_strings.address (), 5,
3481 &dst_string, type);
3482 ASSERT_TRUE (result);
3483 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3484 free (const_cast <unsigned char *> (dst_string.text));
3486 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3487 test.m_concats.record_string_concatenation (5, input_locs);
3489 location_t initial_loc = input_locs[0];
3491 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3492 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3493 and expect get_source_range_for_substring to fail.
3494 However, for a string concatenation test, we can have a case
3495 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3496 but subsequent strings can be after it.
3497 Attempting to detect this within assert_char_at_range
3498 would overcomplicate the logic for the common test cases, so
3499 we detect it here. */
3500 if (should_have_column_data_p (input_locs[0])
3501 && !should_have_column_data_p (input_locs[4]))
3503 /* Verify that get_source_range_for_substring gracefully rejects
3504 this case. */
3505 source_range actual_range;
3506 const char *err
3507 = get_source_range_for_char (test.m_parser, &test.m_concats,
3508 initial_loc, type, 0, &actual_range);
3509 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3510 return;
3513 for (int i = 0; i < 5; i++)
3514 for (int j = 0; j < 2; j++)
3515 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3516 i + 1, 10 + j, 10 + j);
3518 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3519 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3521 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3524 /* Another test of string literal concatenation, this time combined with
3525 various kinds of escaped characters. */
3527 static void
3528 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3530 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3531 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3532 const char *content
3533 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3534 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3535 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3536 lexer_test test (case_, content, NULL);
3538 auto_vec <cpp_string> input_strings;
3539 location_t input_locs[4];
3541 /* Verify that we get the expected tokens back. */
3542 for (int i = 0; i < 4; i++)
3544 const cpp_token *tok = test.get_token ();
3545 ASSERT_EQ (tok->type, CPP_STRING);
3546 input_strings.safe_push (tok->val.str);
3547 input_locs[i] = tok->src_loc;
3550 /* Verify that cpp_interpret_string works. */
3551 cpp_string dst_string;
3552 const enum cpp_ttype type = CPP_STRING;
3553 bool result = cpp_interpret_string (test.m_parser,
3554 input_strings.address (), 4,
3555 &dst_string, type);
3556 ASSERT_TRUE (result);
3557 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3558 free (const_cast <unsigned char *> (dst_string.text));
3560 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3561 test.m_concats.record_string_concatenation (4, input_locs);
3563 location_t initial_loc = input_locs[0];
3565 for (int i = 0; i <= 4; i++)
3566 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3567 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3568 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3569 for (int i = 7; i <= 9; i++)
3570 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3572 /* NUL-terminator should use the location of the final closing quote. */
3573 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3575 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3578 /* Test of string literal in a macro. */
3580 static void
3581 test_lexer_string_locations_macro (const line_table_case &case_)
3583 /* Digits 0-9.
3584 .....................0000000001111111111.22222222223.
3585 .....................1234567890123456789.01234567890. */
3586 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3587 " MACRO");
3588 lexer_test test (case_, content, NULL);
3590 /* Verify that we get the expected tokens back. */
3591 const cpp_token *tok = test.get_token ();
3592 ASSERT_EQ (tok->type, CPP_PADDING);
3594 tok = test.get_token ();
3595 ASSERT_EQ (tok->type, CPP_STRING);
3596 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3598 /* Verify ranges of individual characters. We ought to
3599 see columns within the macro definition. */
3600 for (int i = 0; i <= 10; i++)
3601 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3602 i, 1, 20 + i, 20 + i);
3604 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3606 tok = test.get_token ();
3607 ASSERT_EQ (tok->type, CPP_PADDING);
3610 /* Test of stringification of a macro argument. */
3612 static void
3613 test_lexer_string_locations_stringified_macro_argument
3614 (const line_table_case &case_)
3616 /* .....................000000000111111111122222222223.
3617 .....................123456789012345678901234567890. */
3618 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3619 "MACRO(foo)\n");
3620 lexer_test test (case_, content, NULL);
3622 /* Verify that we get the expected token back. */
3623 const cpp_token *tok = test.get_token ();
3624 ASSERT_EQ (tok->type, CPP_PADDING);
3626 tok = test.get_token ();
3627 ASSERT_EQ (tok->type, CPP_STRING);
3628 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3630 /* We don't support getting the location of a stringified macro
3631 argument. Verify that it fails gracefully. */
3632 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3633 "cpp_interpret_string_1 failed");
3635 tok = test.get_token ();
3636 ASSERT_EQ (tok->type, CPP_PADDING);
3638 tok = test.get_token ();
3639 ASSERT_EQ (tok->type, CPP_PADDING);
3642 /* Ensure that we are fail gracefully if something attempts to pass
3643 in a location that isn't a string literal token. Seen on this code:
3645 const char a[] = " %d ";
3646 __builtin_printf (a, 0.5);
3649 when c-format.cc erroneously used the indicated one-character
3650 location as the format string location, leading to a read past the
3651 end of a string buffer in cpp_interpret_string_1. */
3653 static void
3654 test_lexer_string_locations_non_string (const line_table_case &case_)
3656 /* .....................000000000111111111122222222223.
3657 .....................123456789012345678901234567890. */
3658 const char *content = (" a\n");
3659 lexer_test test (case_, content, NULL);
3661 /* Verify that we get the expected token back. */
3662 const cpp_token *tok = test.get_token ();
3663 ASSERT_EQ (tok->type, CPP_NAME);
3664 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3666 /* At this point, libcpp is attempting to interpret the name as a
3667 string literal, despite it not starting with a quote. We don't detect
3668 that, but we should at least fail gracefully. */
3669 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3670 "cpp_interpret_string_1 failed");
3673 /* Ensure that we can read substring information for a token which
3674 starts in one linemap and ends in another . Adapted from
3675 gcc.dg/cpp/pr69985.c. */
3677 static void
3678 test_lexer_string_locations_long_line (const line_table_case &case_)
3680 /* .....................000000.000111111111
3681 .....................123456.789012346789. */
3682 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3683 " \"0123456789012345678901234567890123456789"
3684 "0123456789012345678901234567890123456789"
3685 "0123456789012345678901234567890123456789"
3686 "0123456789\"\n");
3688 lexer_test test (case_, content, NULL);
3690 /* Verify that we get the expected token back. */
3691 const cpp_token *tok = test.get_token ();
3692 ASSERT_EQ (tok->type, CPP_STRING);
3694 if (!should_have_column_data_p (line_table->highest_location))
3695 return;
3697 /* Verify ranges of individual characters. */
3698 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3699 for (int i = 0; i < 131; i++)
3700 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3701 i, 2, 7 + i, 7 + i);
3704 /* Test of locations within a raw string that doesn't contain a newline. */
3706 static void
3707 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3709 /* .....................00.0000000111111111122.
3710 .....................12.3456789012345678901. */
3711 const char *content = ("R\"foo(0123456789)foo\"\n");
3712 lexer_test test (case_, content, NULL);
3714 /* Verify that we get the expected token back. */
3715 const cpp_token *tok = test.get_token ();
3716 ASSERT_EQ (tok->type, CPP_STRING);
3718 /* Verify that cpp_interpret_string works. */
3719 cpp_string dst_string;
3720 const enum cpp_ttype type = CPP_STRING;
3721 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3722 &dst_string, type);
3723 ASSERT_TRUE (result);
3724 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3725 free (const_cast <unsigned char *> (dst_string.text));
3727 if (!should_have_column_data_p (line_table->highest_location))
3728 return;
3730 /* 0-9, plus the nil terminator. */
3731 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3732 for (int i = 0; i < 11; i++)
3733 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3734 i, 1, 7 + i, 7 + i);
3737 /* Test of locations within a raw string that contains a newline. */
3739 static void
3740 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3742 /* .....................00.0000.
3743 .....................12.3456. */
3744 const char *content = ("R\"foo(\n"
3745 /* .....................00000.
3746 .....................12345. */
3747 "hello\n"
3748 "world\n"
3749 /* .....................00000.
3750 .....................12345. */
3751 ")foo\"\n");
3752 lexer_test test (case_, content, NULL);
3754 /* Verify that we get the expected token back. */
3755 const cpp_token *tok = test.get_token ();
3756 ASSERT_EQ (tok->type, CPP_STRING);
3758 /* Verify that cpp_interpret_string works. */
3759 cpp_string dst_string;
3760 const enum cpp_ttype type = CPP_STRING;
3761 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3762 &dst_string, type);
3763 ASSERT_TRUE (result);
3764 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3765 free (const_cast <unsigned char *> (dst_string.text));
3767 if (!should_have_column_data_p (line_table->highest_location))
3768 return;
3770 /* Currently we don't support locations within raw strings that
3771 contain newlines. */
3772 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3773 "range endpoints are on different lines");
3776 /* Test of parsing an unterminated raw string. */
3778 static void
3779 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3781 const char *content = "R\"ouch()ouCh\" /* etc */";
3783 lexer_diagnostic_sink diagnostics;
3784 lexer_test test (case_, content, &diagnostics);
3785 test.m_implicitly_expect_EOF = false;
3787 /* Attempt to parse the raw string. */
3788 const cpp_token *tok = test.get_token ();
3789 ASSERT_EQ (tok->type, CPP_EOF);
3791 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3792 /* We expect the message "unterminated raw string"
3793 in the "cpplib" translation domain.
3794 It's not clear that dgettext is available on all supported hosts,
3795 so this assertion is commented-out for now.
3796 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3797 diagnostics.m_diagnostics[0]);
3801 /* Test of lexing char constants. */
3803 static void
3804 test_lexer_char_constants (const line_table_case &case_)
3806 /* Various char constants.
3807 .....................0000000001111111111.22222222223.
3808 .....................1234567890123456789.01234567890. */
3809 const char *content = (" 'a'\n"
3810 " u'a'\n"
3811 " U'a'\n"
3812 " L'a'\n"
3813 " 'abc'\n");
3814 lexer_test test (case_, content, NULL);
3816 /* Verify that we get the expected tokens back. */
3817 /* 'a'. */
3818 const cpp_token *tok = test.get_token ();
3819 ASSERT_EQ (tok->type, CPP_CHAR);
3820 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3822 unsigned int chars_seen;
3823 int unsignedp;
3824 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3825 &chars_seen, &unsignedp);
3826 ASSERT_EQ (cc, 'a');
3827 ASSERT_EQ (chars_seen, 1);
3829 /* u'a'. */
3830 tok = test.get_token ();
3831 ASSERT_EQ (tok->type, CPP_CHAR16);
3832 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3834 /* U'a'. */
3835 tok = test.get_token ();
3836 ASSERT_EQ (tok->type, CPP_CHAR32);
3837 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3839 /* L'a'. */
3840 tok = test.get_token ();
3841 ASSERT_EQ (tok->type, CPP_WCHAR);
3842 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3844 /* 'abc' (c-char-sequence). */
3845 tok = test.get_token ();
3846 ASSERT_EQ (tok->type, CPP_CHAR);
3847 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3849 /* A table of interesting location_t values, giving one axis of our test
3850 matrix. */
3852 static const location_t boundary_locations[] = {
3853 /* Zero means "don't override the default values for a new line_table". */
3856 /* An arbitrary non-zero value that isn't close to one of
3857 the boundary values below. */
3858 0x10000,
3860 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3861 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3862 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3863 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3864 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3865 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3867 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3868 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3869 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3870 LINE_MAP_MAX_LOCATION_WITH_COLS,
3871 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3872 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3875 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3877 void
3878 for_each_line_table_case (void (*testcase) (const line_table_case &))
3880 /* As noted above in the description of struct line_table_case,
3881 we want to explore a test matrix of interesting line_table
3882 situations, running various selftests for each case within the
3883 matrix. */
3885 /* Run all tests with:
3886 (a) line_table->default_range_bits == 0, and
3887 (b) line_table->default_range_bits == 5. */
3888 int num_cases_tested = 0;
3889 for (int default_range_bits = 0; default_range_bits <= 5;
3890 default_range_bits += 5)
3892 /* ...and use each of the "interesting" location values as
3893 the starting location within line_table. */
3894 const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
3895 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3897 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3899 testcase (c);
3901 num_cases_tested++;
3905 /* Verify that we fully covered the test matrix. */
3906 ASSERT_EQ (num_cases_tested, 2 * 12);
3909 /* Verify that when presented with a consecutive pair of locations with
3910 a very large line offset, we don't attempt to consolidate them into
3911 a single ordinary linemap where the line offsets within the line map
3912 would lead to overflow (PR lto/88147). */
3914 static void
3915 test_line_offset_overflow ()
3917 line_table_test ltt (line_table_case (5, 0));
3919 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3920 linemap_line_start (line_table, 1, 100);
3921 location_t loc_a = linemap_line_start (line_table, 2578, 255);
3922 assert_loceq ("foo.c", 2578, 0, loc_a);
3924 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3925 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3926 ASSERT_EQ (ordmap_a->m_range_bits, 5);
3928 location_t loc_b = linemap_line_start (line_table, 404198, 512);
3929 assert_loceq ("foo.c", 404198, 0, loc_b);
3931 /* We should have started a new linemap, rather than attempting to store
3932 a very large line offset. */
3933 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3934 ASSERT_NE (ordmap_a, ordmap_b);
3937 void test_cpp_utf8 ()
3939 const int def_tabstop = 8;
3940 cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3942 /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
3944 int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
3945 ASSERT_EQ (8, w_bad);
3946 int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
3947 ASSERT_EQ (5, w_ctrl);
3950 /* Verify that wcwidth of valid UTF-8 is as expected. */
3952 const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
3953 ASSERT_EQ (1, w_pi);
3954 const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
3955 ASSERT_EQ (2, w_emoji);
3956 const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3957 policy);
3958 ASSERT_EQ (1, w_umlaut_precomposed);
3959 const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3960 policy);
3961 ASSERT_EQ (1, w_umlaut_combining);
3962 const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
3963 ASSERT_EQ (2, w_han);
3964 const int w_ascii = cpp_display_width ("GCC", 3, policy);
3965 ASSERT_EQ (3, w_ascii);
3966 const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3967 "\x9f! \xe4\xb8\xba y\xcc\x88",
3968 24, policy);
3969 ASSERT_EQ (18, w_mixed);
3972 /* Verify that display width properly expands tabs. */
3974 const char *tstr = "\tabc\td";
3975 ASSERT_EQ (6, cpp_display_width (tstr, 6,
3976 cpp_char_column_policy (1, cpp_wcwidth)));
3977 ASSERT_EQ (10, cpp_display_width (tstr, 6,
3978 cpp_char_column_policy (3, cpp_wcwidth)));
3979 ASSERT_EQ (17, cpp_display_width (tstr, 6,
3980 cpp_char_column_policy (8, cpp_wcwidth)));
3981 ASSERT_EQ (1,
3982 cpp_display_column_to_byte_column
3983 (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
3986 /* Verify that cpp_byte_column_to_display_column can go past the end,
3987 and similar edge cases. */
3989 const char *str
3990 /* Display columns.
3991 111111112345 */
3992 = "\xcf\x80 abc";
3993 /* 111122223456
3994 Byte columns. */
3996 ASSERT_EQ (5, cpp_display_width (str, 6, policy));
3997 ASSERT_EQ (105,
3998 cpp_byte_column_to_display_column (str, 6, 106, policy));
3999 ASSERT_EQ (10000,
4000 cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
4001 ASSERT_EQ (0,
4002 cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
4005 /* Verify that cpp_display_column_to_byte_column can go past the end,
4006 and similar edge cases, and check invertibility. */
4008 const char *str
4009 /* Display columns.
4010 000000000000000000000000000000000000011
4011 111111112222222234444444455555555678901 */
4012 = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
4013 /* 000000000000000000000000000000000111111
4014 111122223333444456666777788889999012345
4015 Byte columns. */
4016 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
4017 ASSERT_EQ (15,
4018 cpp_display_column_to_byte_column (str, 15, 11, policy));
4019 ASSERT_EQ (115,
4020 cpp_display_column_to_byte_column (str, 15, 111, policy));
4021 ASSERT_EQ (10000,
4022 cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
4023 ASSERT_EQ (0,
4024 cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
4026 /* Verify that we do not interrupt a UTF-8 sequence. */
4027 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
4029 for (int byte_col = 1; byte_col <= 15; ++byte_col)
4031 const int disp_col
4032 = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
4033 const int byte_col2
4034 = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
4036 /* If we ask for the display column in the middle of a UTF-8
4037 sequence, it will return the length of the partial sequence,
4038 matching the behavior of GCC before display column support.
4039 Otherwise check the round trip was successful. */
4040 if (byte_col < 4)
4041 ASSERT_EQ (byte_col, disp_col);
4042 else if (byte_col >= 6 && byte_col < 9)
4043 ASSERT_EQ (3 + (byte_col - 5), disp_col);
4044 else
4045 ASSERT_EQ (byte_col2, byte_col);
4051 /* Run all of the selftests within this file. */
4053 void
4054 input_cc_tests ()
4056 test_linenum_comparisons ();
4057 test_should_have_column_data_p ();
4058 test_unknown_location ();
4059 test_builtins ();
4060 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
4062 for_each_line_table_case (test_accessing_ordinary_linemaps);
4063 for_each_line_table_case (test_lexer);
4064 for_each_line_table_case (test_lexer_string_locations_simple);
4065 for_each_line_table_case (test_lexer_string_locations_ebcdic);
4066 for_each_line_table_case (test_lexer_string_locations_hex);
4067 for_each_line_table_case (test_lexer_string_locations_oct);
4068 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
4069 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
4070 for_each_line_table_case (test_lexer_string_locations_ucn4);
4071 for_each_line_table_case (test_lexer_string_locations_ucn8);
4072 for_each_line_table_case (test_lexer_string_locations_wide_string);
4073 for_each_line_table_case (test_lexer_string_locations_string16);
4074 for_each_line_table_case (test_lexer_string_locations_string32);
4075 for_each_line_table_case (test_lexer_string_locations_u8);
4076 for_each_line_table_case (test_lexer_string_locations_utf8_source);
4077 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
4078 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
4079 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
4080 for_each_line_table_case (test_lexer_string_locations_macro);
4081 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
4082 for_each_line_table_case (test_lexer_string_locations_non_string);
4083 for_each_line_table_case (test_lexer_string_locations_long_line);
4084 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
4085 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
4086 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
4087 for_each_line_table_case (test_lexer_char_constants);
4089 test_reading_source_line ();
4091 test_line_offset_overflow ();
4093 test_cpp_utf8 ();
4096 } // namespace selftest
4098 #endif /* CHECKING_P */