1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 /* This is a cache used by get_next_line to store the content of a
28 file to be searched for file lines. */
31 /* These are information used to store a line boundary. */
34 /* The line number. It starts from 1. */
37 /* The position (byte count) of the beginning of the line,
38 relative to the file data pointer. This starts at zero. */
41 /* The position (byte count) of the last byte of the line. This
42 normally points to the '\n' character, or to one byte after the
43 last byte of the file, if the file doesn't contain a '\n'
47 line_info (size_t l
, size_t s
, size_t e
)
48 : line_num (l
), start_pos (s
), end_pos (e
)
52 :line_num (0), start_pos (0), end_pos (0)
56 /* The number of time this file has been accessed. This is used
57 to designate which file cache to evict from the cache
61 const char *file_path
;
65 /* This points to the content of the file that we've read so
69 /* The size of the DATA array above.*/
72 /* The number of bytes read from the underlying file so far. This
73 must be less (or equal) than SIZE above. */
76 /* The index of the beginning of the current line. */
77 size_t line_start_idx
;
79 /* The number of the previous line read. This starts at 1. Zero
80 means we've read no line so far. */
83 /* This is the total number of lines of the current file. At the
84 moment, we try to get this information from the line map
85 subsystem. Note that this is just a hint. When using the C++
86 front-end, this hint is correct because the input file is then
87 completely tokenized before parsing starts; so the line map knows
88 the number of lines before compilation really starts. For e.g,
89 the C front-end, it can happen that we start emitting diagnostics
90 before the line map has seen the end of the file. */
93 /* This is a record of the beginning and end of the lines we've seen
94 while reading the file. This is useful to avoid walking the data
95 from the beginning when we are asked to read a line that is
96 before LINE_START_IDX above. Note that the maximum size of this
97 record is fcache_line_record_size, so that the memory consumption
98 doesn't explode. We thus scale total_lines down to
99 fcache_line_record_size. */
100 vec
<line_info
, va_heap
> line_record
;
106 /* Current position in real source file. */
108 location_t input_location
;
110 struct line_maps
*line_table
;
112 static fcache
*fcache_tab
;
113 static const size_t fcache_tab_size
= 16;
114 static const size_t fcache_buffer_size
= 4 * 1024;
115 static const size_t fcache_line_record_size
= 100;
117 /* Expand the source location LOC into a human readable location. If
118 LOC resolves to a builtin location, the file name of the readable
119 location is set to the string "<built-in>". If EXPANSION_POINT_P is
120 TRUE and LOC is virtual, then it is resolved to the expansion
121 point of the involved macro. Otherwise, it is resolved to the
122 spelling location of the token.
124 When resolving to the spelling location of the token, if the
125 resulting location is for a built-in location (that is, it has no
126 associated line/column) in the context of a macro expansion, the
127 returned location is the first one (while unwinding the macro
128 location towards its expansion point) that is in real source
131 static expanded_location
132 expand_location_1 (source_location loc
,
133 bool expansion_point_p
)
135 expanded_location xloc
;
136 const struct line_map
*map
;
137 enum location_resolution_kind lrk
= LRK_MACRO_EXPANSION_POINT
;
140 if (IS_ADHOC_LOC (loc
))
142 block
= LOCATION_BLOCK (loc
);
143 loc
= LOCATION_LOCUS (loc
);
146 memset (&xloc
, 0, sizeof (xloc
));
148 if (loc
>= RESERVED_LOCATION_COUNT
)
150 if (!expansion_point_p
)
152 /* We want to resolve LOC to its spelling location.
154 But if that spelling location is a reserved location that
155 appears in the context of a macro expansion (like for a
156 location for a built-in token), let's consider the first
157 location (toward the expansion point) that is not reserved;
158 that is, the first location that is in real source code. */
159 loc
= linemap_unwind_to_first_non_reserved_loc (line_table
,
161 lrk
= LRK_SPELLING_LOCATION
;
163 loc
= linemap_resolve_location (line_table
, loc
,
165 xloc
= linemap_expand_location (line_table
, map
, loc
);
169 if (loc
<= BUILTINS_LOCATION
)
170 xloc
.file
= loc
== UNKNOWN_LOCATION
? NULL
: _("<built-in>");
175 /* Initialize the set of cache used for files accessed by caret
179 diagnostic_file_cache_init (void)
181 if (fcache_tab
== NULL
)
182 fcache_tab
= new fcache
[fcache_tab_size
];
185 /* Free the ressources used by the set of cache used for files accessed
186 by caret diagnostic. */
189 diagnostic_file_cache_fini (void)
193 delete [] (fcache_tab
);
198 /* Return the total lines number that have been read so far by the
199 line map (in the preprocessor) so far. For languages like C++ that
200 entirely preprocess the input file before starting to parse, this
201 equals the actual number of lines of the file. */
204 total_lines_num (const char *file_path
)
207 source_location l
= 0;
208 if (linemap_get_file_highest_location (line_table
, file_path
, &l
))
210 gcc_assert (l
>= RESERVED_LOCATION_COUNT
);
211 expanded_location xloc
= expand_location (l
);
217 /* Lookup the cache used for the content of a given file accessed by
218 caret diagnostic. Return the found cached file, or NULL if no
219 cached file was found. */
222 lookup_file_in_cache_tab (const char *file_path
)
224 if (file_path
== NULL
)
227 diagnostic_file_cache_init ();
229 /* This will contain the found cached file. */
231 for (unsigned i
= 0; i
< fcache_tab_size
; ++i
)
233 fcache
*c
= &fcache_tab
[i
];
234 if (c
->file_path
&& !strcmp (c
->file_path
, file_path
))
247 /* Return the file cache that has been less used, recently, or the
248 first empty one. If HIGHEST_USE_COUNT is non-null,
249 *HIGHEST_USE_COUNT is set to the highest use count of the entries
250 in the cache table. */
253 evicted_cache_tab_entry (unsigned *highest_use_count
)
255 diagnostic_file_cache_init ();
257 fcache
*to_evict
= &fcache_tab
[0];
258 unsigned huc
= to_evict
->use_count
;
259 for (unsigned i
= 1; i
< fcache_tab_size
; ++i
)
261 fcache
*c
= &fcache_tab
[i
];
262 bool c_is_empty
= (c
->file_path
== NULL
);
264 if (c
->use_count
< to_evict
->use_count
265 || (to_evict
->file_path
&& c_is_empty
))
266 /* We evict C because it's either an entry with a lower use
267 count or one that is empty. */
270 if (huc
< c
->use_count
)
274 /* We've reached the end of the cache; subsequent elements are
279 if (highest_use_count
)
280 *highest_use_count
= huc
;
285 /* Create the cache used for the content of a given file to be
286 accessed by caret diagnostic. This cache is added to an array of
287 cache and can be retrieved by lookup_file_in_cache_tab. This
288 function returns the created cache. Note that only the last
289 fcache_tab_size files are cached. */
292 add_file_to_cache_tab (const char *file_path
)
295 FILE *fp
= fopen (file_path
, "r");
299 unsigned highest_use_count
= 0;
300 fcache
*r
= evicted_cache_tab_entry (&highest_use_count
);
301 r
->file_path
= file_path
;
306 r
->line_start_idx
= 0;
308 r
->line_record
.truncate (0);
309 /* Ensure that this cache entry doesn't get evicted next time
310 add_file_to_cache_tab is called. */
311 r
->use_count
= ++highest_use_count
;
312 r
->total_lines
= total_lines_num (file_path
);
317 /* Lookup the cache used for the content of a given file accessed by
318 caret diagnostic. If no cached file was found, create a new cache
319 for this file, add it to the array of cached file and return
323 lookup_or_add_file_to_cache_tab (const char *file_path
)
325 fcache
*r
= lookup_file_in_cache_tab (file_path
);
327 r
= add_file_to_cache_tab (file_path
);
331 /* Default constructor for a cache of file used by caret
335 : use_count (0), file_path (NULL
), fp (NULL
), data (0),
336 size (0), nb_read (0), line_start_idx (0), line_num (0),
339 line_record
.create (0);
342 /* Destructor for a cache of file used by caret diagnostic. */
356 line_record
.release ();
359 /* Returns TRUE iff the cache would need to be filled with data coming
360 from the file. That is, either the cache is empty or full or the
361 current line is empty. Note that if the cache is full, it would
362 need to be extended and filled again. */
365 needs_read (fcache
*c
)
367 return (c
->nb_read
== 0
368 || c
->nb_read
== c
->size
369 || (c
->line_start_idx
>= c
->nb_read
- 1));
372 /* Return TRUE iff the cache is full and thus needs to be
376 needs_grow (fcache
*c
)
378 return c
->nb_read
== c
->size
;
381 /* Grow the cache if it needs to be extended. */
384 maybe_grow (fcache
*c
)
389 size_t size
= c
->size
== 0 ? fcache_buffer_size
: c
->size
* 2;
390 c
->data
= XRESIZEVEC (char, c
->data
, size
+ 1);
394 /* Read more data into the cache. Extends the cache if need be.
395 Returns TRUE iff new data could be read. */
398 read_data (fcache
*c
)
400 if (feof (c
->fp
) || ferror (c
->fp
))
405 char * from
= c
->data
+ c
->nb_read
;
406 size_t to_read
= c
->size
- c
->nb_read
;
407 size_t nb_read
= fread (from
, 1, to_read
, c
->fp
);
412 c
->nb_read
+= nb_read
;
416 /* Read new data iff the cache needs to be filled with more data
417 coming from the file FP. Return TRUE iff the cache was filled with
421 maybe_read_data (fcache
*c
)
425 return read_data (c
);
428 /* Read a new line from file FP, using C as a cache for the data
429 coming from the file. Upon successful completion, *LINE is set to
430 the beginning of the line found. Space for that line has been
431 allocated in the cache thus *LINE has the same life time as C.
432 *LINE_LEN is set to the length of the line. Note that the line
433 does not contain any terminal delimiter. This function returns
434 true if some data was read or process from the cache, false
435 otherwise. Note that subsequent calls to get_next_line return the
436 next lines of the file and might overwrite the content of
440 get_next_line (fcache
*c
, char **line
, ssize_t
*line_len
)
442 /* Fill the cache with data to process. */
445 size_t remaining_size
= c
->nb_read
- c
->line_start_idx
;
446 if (remaining_size
== 0)
447 /* There is no more data to process. */
450 char *line_start
= c
->data
+ c
->line_start_idx
;
452 char *next_line_start
= NULL
;
454 char *line_end
= (char *) memchr (line_start
, '\n', remaining_size
);
455 if (line_end
== NULL
)
457 /* We haven't found the end-of-line delimiter in the cache.
458 Fill the cache with more data from the file and look for the
460 while (maybe_read_data (c
))
462 line_start
= c
->data
+ c
->line_start_idx
;
463 remaining_size
= c
->nb_read
- c
->line_start_idx
;
464 line_end
= (char *) memchr (line_start
, '\n', remaining_size
);
465 if (line_end
!= NULL
)
467 next_line_start
= line_end
+ 1;
471 if (line_end
== NULL
)
472 /* We've loadded all the file into the cache and still no
473 '\n'. Let's say the line ends up at one byte passed the
474 end of the file. This is to stay consistent with the case
475 of when the line ends up with a '\n' and line_end points to
476 that terminal '\n'. That consistency is useful below in
477 the len calculation. */
478 line_end
= c
->data
+ c
->nb_read
;
481 next_line_start
= line_end
+ 1;
486 /* At this point, we've found the end of the of line. It either
487 points to the '\n' or to one byte after the last byte of the
489 gcc_assert (line_end
!= NULL
);
491 len
= line_end
- line_start
;
493 if (c
->line_start_idx
< c
->nb_read
)
498 /* Before we update our line record, make sure the hint about the
499 total number of lines of the file is correct. If it's not, then
500 we give up recording line boundaries from now on. */
501 bool update_line_record
= true;
502 if (c
->line_num
> c
->total_lines
)
503 update_line_record
= false;
505 /* Now update our line record so that re-reading lines from the
506 before c->line_start_idx is faster. */
507 if (update_line_record
508 && c
->line_record
.length () < fcache_line_record_size
)
510 /* If the file lines fits in the line record, we just record all
512 if (c
->total_lines
<= fcache_line_record_size
513 && c
->line_num
> c
->line_record
.length ())
514 c
->line_record
.safe_push (fcache::line_info (c
->line_num
,
516 line_end
- c
->data
));
517 else if (c
->total_lines
> fcache_line_record_size
)
519 /* ... otherwise, we just scale total_lines down to
520 (fcache_line_record_size lines. */
521 size_t n
= (c
->line_num
* fcache_line_record_size
) / c
->total_lines
;
522 if (c
->line_record
.length () == 0
523 || n
>= c
->line_record
.length ())
524 c
->line_record
.safe_push (fcache::line_info (c
->line_num
,
526 line_end
- c
->data
));
530 /* Update c->line_start_idx so that it points to the next line to be
533 c
->line_start_idx
= next_line_start
- c
->data
;
535 /* We didn't find any terminal '\n'. Let's consider that the end
536 of line is the end of the data in the cache. The next
537 invocation of get_next_line will either read more data from the
538 underlying file or return false early because we've reached the
540 c
->line_start_idx
= c
->nb_read
;
547 /* Reads the next line from FILE into *LINE. If *LINE is too small
548 (or NULL) it is allocated (or extended) to have enough space to
549 containe the line. *LINE_LENGTH must contain the size of the
550 initial*LINE buffer. It's then updated by this function to the
551 actual length of the returned line. Note that the returned line
552 can contain several zero bytes. Also note that the returned string
553 is allocated in static storage that is going to be re-used by
554 subsequent invocations of read_line. */
557 read_next_line (fcache
*cache
, char ** line
, ssize_t
*line_len
)
562 if (!get_next_line (cache
, &l
, &len
))
566 *line
= XNEWVEC (char, len
);
569 *line
= XRESIZEVEC (char, *line
, len
);
571 memcpy (*line
, l
, len
);
577 /* Consume the next bytes coming from the cache (or from its
578 underlying file if there are remaining unread bytes in the file)
579 until we reach the next end-of-line (or end-of-file). There is no
580 copying from the cache involved. Return TRUE upon successful
584 goto_next_line (fcache
*cache
)
589 return get_next_line (cache
, &l
, &len
);
592 /* Read an arbitrary line number LINE_NUM from the file cached in C.
593 The line is copied into *LINE. *LINE_LEN must have been set to the
594 length of *LINE. If *LINE is too small (or NULL) it's extended (or
595 allocated) and *LINE_LEN is adjusted accordingly. *LINE ends up
596 with a terminal zero byte and can contain additional zero bytes.
597 This function returns bool if a line was read. */
600 read_line_num (fcache
*c
, size_t line_num
,
601 char ** line
, ssize_t
*line_len
)
603 gcc_assert (line_num
> 0);
605 if (line_num
<= c
->line_num
)
607 /* We've been asked to read lines that are before c->line_num.
608 So lets use our line record (if it's not empty) to try to
609 avoid re-reading the file from the beginning again. */
611 if (c
->line_record
.is_empty ())
613 c
->line_start_idx
= 0;
618 fcache::line_info
*i
= NULL
;
619 if (c
->total_lines
<= fcache_line_record_size
)
621 /* In languages where the input file is not totally
622 preprocessed up front, the c->total_lines hint
623 can be smaller than the number of lines of the
624 file. In that case, only the first
625 c->total_lines have been recorded.
627 Otherwise, the first c->total_lines we've read have
628 their start/end recorded here. */
629 i
= (line_num
<= c
->total_lines
)
630 ? &c
->line_record
[line_num
- 1]
631 : &c
->line_record
[c
->total_lines
- 1];
632 gcc_assert (i
->line_num
<= line_num
);
636 /* So the file had more lines than our line record
637 size. Thus the number of lines we've recorded has
638 been scaled down to fcache_line_reacord_size. Let's
639 pick the start/end of the recorded line that is
640 closest to line_num. */
641 size_t n
= (line_num
<= c
->total_lines
)
642 ? line_num
* fcache_line_record_size
/ c
->total_lines
643 : c
->line_record
.length () - 1;
644 if (n
< c
->line_record
.length ())
646 i
= &c
->line_record
[n
];
647 gcc_assert (i
->line_num
<= line_num
);
651 if (i
&& i
->line_num
== line_num
)
653 /* We have the start/end of the line. Let's just copy
654 it again and we are done. */
655 ssize_t len
= i
->end_pos
- i
->start_pos
+ 1;
657 *line
= XRESIZEVEC (char, *line
, len
);
658 memmove (*line
, c
->data
+ i
->start_pos
, len
);
659 (*line
)[len
- 1] = '\0';
666 c
->line_start_idx
= i
->start_pos
;
667 c
->line_num
= i
->line_num
- 1;
671 c
->line_start_idx
= 0;
677 /* Let's walk from line c->line_num up to line_num - 1, without
679 while (c
->line_num
< line_num
- 1)
680 if (!goto_next_line (c
))
683 /* The line we want is the next one. Let's read and copy it back to
685 return read_next_line (c
, line
, line_len
);
688 /* Return the physical source line that corresponds to xloc in a
689 buffer that is statically allocated. The newline is replaced by
690 the null character. Note that the line can contain several null
691 characters, so LINE_LEN, if non-null, points to the actual length
695 location_get_source_line (expanded_location xloc
,
704 fcache
*c
= lookup_or_add_file_to_cache_tab (xloc
.file
);
708 bool read
= read_line_num (c
, xloc
.line
, &buffer
, &len
);
710 if (read
&& line_len
)
713 return read
? buffer
: NULL
;
716 /* Test if the location originates from the spelling location of a
717 builtin-tokens. That is, return TRUE if LOC is a (possibly
718 virtual) location of a built-in token that appears in the expansion
719 list of a macro. Please note that this function also works on
720 tokens that result from built-in tokens. For instance, the
721 function would return true if passed a token "4" that is the result
722 of the expansion of the built-in __LINE__ macro. */
724 is_location_from_builtin_token (source_location loc
)
726 const line_map
*map
= NULL
;
727 loc
= linemap_resolve_location (line_table
, loc
,
728 LRK_SPELLING_LOCATION
, &map
);
729 return loc
== BUILTINS_LOCATION
;
732 /* Expand the source location LOC into a human readable location. If
733 LOC is virtual, it resolves to the expansion point of the involved
734 macro. If LOC resolves to a builtin location, the file name of the
735 readable location is set to the string "<built-in>". */
738 expand_location (source_location loc
)
740 return expand_location_1 (loc
, /*expansion_point_p=*/true);
743 /* Expand the source location LOC into a human readable location. If
744 LOC is virtual, it resolves to the expansion location of the
745 relevant macro. If LOC resolves to a builtin location, the file
746 name of the readable location is set to the string
750 expand_location_to_spelling_point (source_location loc
)
752 return expand_location_1 (loc
, /*expansion_piont_p=*/false);
755 /* If LOCATION is in a system header and if it's a virtual location for
756 a token coming from the expansion of a macro M, unwind it to the
757 location of the expansion point of M. Otherwise, just return
760 This is used for instance when we want to emit diagnostics about a
761 token that is located in a macro that is itself defined in a system
762 header -- e.g for the NULL macro. In that case, if LOCATION is
763 passed to diagnostics emitting functions like warning_at as is, no
764 diagnostic won't be emitted. */
767 expansion_point_location_if_in_system_header (source_location location
)
769 if (in_system_header_at (location
))
770 location
= linemap_resolve_location (line_table
, location
,
771 LRK_MACRO_EXPANSION_POINT
,
777 #define ONE_M (ONE_K * ONE_K)
779 /* Display a number as an integer multiple of either:
780 - 1024, if said integer is >= to 10 K (in base 2)
781 - 1024 * 1024, if said integer is >= 10 M in (base 2)
783 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
785 : ((x) < 10 * ONE_M \
789 /* For a given integer, display either:
790 - the character 'k', if the number is higher than 10 K (in base 2)
791 but strictly lower than 10 M (in base 2)
792 - the character 'M' if the number is higher than 10 M (in base2)
793 - the charcter ' ' if the number is strictly lower than 10 K */
794 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
796 /* Display an integer amount as multiple of 1K or 1M (in base 2).
797 Display the correct unit (either k, M, or ' ') after the amout, as
799 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
801 /* Dump statistics to stderr about the memory usage of the line_table
802 set of line maps. This also displays some statistics about macro
806 dump_line_table_statistics (void)
808 struct linemap_stats s
;
809 long total_used_map_size
,
811 total_allocated_map_size
;
813 memset (&s
, 0, sizeof (s
));
815 linemap_get_statistics (line_table
, &s
);
817 macro_maps_size
= s
.macro_maps_used_size
818 + s
.macro_maps_locations_size
;
820 total_allocated_map_size
= s
.ordinary_maps_allocated_size
821 + s
.macro_maps_allocated_size
822 + s
.macro_maps_locations_size
;
824 total_used_map_size
= s
.ordinary_maps_used_size
825 + s
.macro_maps_used_size
826 + s
.macro_maps_locations_size
;
828 fprintf (stderr
, "Number of expanded macros: %5ld\n",
829 s
.num_expanded_macros
);
830 if (s
.num_expanded_macros
!= 0)
831 fprintf (stderr
, "Average number of tokens per macro expansion: %5ld\n",
832 s
.num_macro_tokens
/ s
.num_expanded_macros
);
834 "\nLine Table allocations during the "
835 "compilation process\n");
836 fprintf (stderr
, "Number of ordinary maps used: %5ld%c\n",
837 SCALE (s
.num_ordinary_maps_used
),
838 STAT_LABEL (s
.num_ordinary_maps_used
));
839 fprintf (stderr
, "Ordinary map used size: %5ld%c\n",
840 SCALE (s
.ordinary_maps_used_size
),
841 STAT_LABEL (s
.ordinary_maps_used_size
));
842 fprintf (stderr
, "Number of ordinary maps allocated: %5ld%c\n",
843 SCALE (s
.num_ordinary_maps_allocated
),
844 STAT_LABEL (s
.num_ordinary_maps_allocated
));
845 fprintf (stderr
, "Ordinary maps allocated size: %5ld%c\n",
846 SCALE (s
.ordinary_maps_allocated_size
),
847 STAT_LABEL (s
.ordinary_maps_allocated_size
));
848 fprintf (stderr
, "Number of macro maps used: %5ld%c\n",
849 SCALE (s
.num_macro_maps_used
),
850 STAT_LABEL (s
.num_macro_maps_used
));
851 fprintf (stderr
, "Macro maps used size: %5ld%c\n",
852 SCALE (s
.macro_maps_used_size
),
853 STAT_LABEL (s
.macro_maps_used_size
));
854 fprintf (stderr
, "Macro maps locations size: %5ld%c\n",
855 SCALE (s
.macro_maps_locations_size
),
856 STAT_LABEL (s
.macro_maps_locations_size
));
857 fprintf (stderr
, "Macro maps size: %5ld%c\n",
858 SCALE (macro_maps_size
),
859 STAT_LABEL (macro_maps_size
));
860 fprintf (stderr
, "Duplicated maps locations size: %5ld%c\n",
861 SCALE (s
.duplicated_macro_maps_locations_size
),
862 STAT_LABEL (s
.duplicated_macro_maps_locations_size
));
863 fprintf (stderr
, "Total allocated maps size: %5ld%c\n",
864 SCALE (total_allocated_map_size
),
865 STAT_LABEL (total_allocated_map_size
));
866 fprintf (stderr
, "Total used maps size: %5ld%c\n",
867 SCALE (total_used_map_size
),
868 STAT_LABEL (total_used_map_size
));
869 fprintf (stderr
, "\n");