1 /* Various declarations for language-independent pretty-print subroutines.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Gabriel Dos Reis <gdr@integrable-solutions.net>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "pretty-print.h"
26 #include "diagnostic-color.h"
32 /* Overwrite the given location/range within this text_info's rich_location.
33 For use e.g. when implementing "+" in client format decoders. */
36 text_info::set_location (unsigned int idx
, location_t loc
, bool show_caret_p
)
38 gcc_checking_assert (m_richloc
);
39 m_richloc
->set_range (line_table
, idx
, loc
, show_caret_p
);
43 text_info::get_location (unsigned int index_of_location
) const
45 gcc_checking_assert (m_richloc
);
47 if (index_of_location
== 0)
48 return m_richloc
->get_loc ();
50 return UNKNOWN_LOCATION
;
53 // Default construct an output buffer.
55 output_buffer::output_buffer ()
56 : formatted_obstack (),
58 obstack (&formatted_obstack
),
65 obstack_init (&formatted_obstack
);
66 obstack_init (&chunk_obstack
);
69 // Release resources owned by an output buffer at the end of lifetime.
71 output_buffer::~output_buffer ()
73 obstack_free (&chunk_obstack
, NULL
);
74 obstack_free (&formatted_obstack
, NULL
);
78 /* Format an integer given by va_arg (ARG, type-specifier T) where
79 type-specifier is a precision modifier as indicated by PREC. F is
80 a string used to construct the appropriate format-specifier. */
81 #define pp_integer_with_precision(PP, ARG, PREC, T, F) \
86 pp_scalar (PP, "%" F, va_arg (ARG, T)); \
90 pp_scalar (PP, "%l" F, va_arg (ARG, long T)); \
94 pp_scalar (PP, "%" HOST_LONG_LONG_FORMAT F, va_arg (ARG, long long T)); \
103 /* Subroutine of pp_set_maximum_length. Set up PRETTY-PRINTER's
104 internal maximum characters per line. */
106 pp_set_real_maximum_length (pretty_printer
*pp
)
108 /* If we're told not to wrap lines then do the obvious thing. In case
109 we'll emit prefix only once per message, it is appropriate
110 not to increase unnecessarily the line-length cut-off. */
111 if (!pp_is_wrapping_line (pp
)
112 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_ONCE
113 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_NEVER
)
114 pp
->maximum_length
= pp_line_cutoff (pp
);
117 int prefix_length
= pp
->prefix
? strlen (pp
->prefix
) : 0;
118 /* If the prefix is ridiculously too long, output at least
120 if (pp_line_cutoff (pp
) - prefix_length
< 32)
121 pp
->maximum_length
= pp_line_cutoff (pp
) + 32;
123 pp
->maximum_length
= pp_line_cutoff (pp
);
127 /* Clear PRETTY-PRINTER's output state. */
129 pp_clear_state (pretty_printer
*pp
)
131 pp
->emitted_prefix
= false;
132 pp_indentation (pp
) = 0;
135 /* Flush the formatted text of PRETTY-PRINTER onto the attached stream. */
137 pp_write_text_to_stream (pretty_printer
*pp
)
139 const char *text
= pp_formatted_text (pp
);
140 fputs (text
, pp_buffer (pp
)->stream
);
141 pp_clear_output_area (pp
);
144 /* As pp_write_text_to_stream, but for GraphViz label output.
146 Flush the formatted text of pretty-printer PP onto the attached stream.
147 Replace characters in PPF that have special meaning in a GraphViz .dot
150 This routine is not very fast, but it doesn't have to be as this is only
151 be used by routines dumping intermediate representations in graph form. */
154 pp_write_text_as_dot_label_to_stream (pretty_printer
*pp
, bool for_record
)
156 const char *text
= pp_formatted_text (pp
);
157 const char *p
= text
;
158 FILE *fp
= pp_buffer (pp
)->stream
;
165 /* Print newlines as a left-aligned newline. */
171 /* The following characters are only special for record-shape nodes. */
178 escape_char
= for_record
;
181 /* The following characters always have to be escaped
182 for use in labels. */
184 /* There is a bug in some (f.i. 2.36.0) versions of graphiz
185 ( http://www.graphviz.org/mantisbt/view.php?id=2524 ) related to
186 backslash as last char in label. Let's avoid triggering it. */
187 gcc_assert (*(p
+ 1) != '\0');
204 pp_clear_output_area (pp
);
207 /* Wrap a text delimited by START and END into PRETTY-PRINTER. */
209 pp_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
211 bool wrapping_line
= pp_is_wrapping_line (pp
);
215 /* Dump anything bordered by whitespaces. */
217 const char *p
= start
;
218 while (p
!= end
&& !ISBLANK (*p
) && *p
!= '\n')
221 && p
- start
>= pp_remaining_character_count_for_line (pp
))
223 pp_append_text (pp
, start
, p
);
227 if (start
!= end
&& ISBLANK (*start
))
232 if (start
!= end
&& *start
== '\n')
240 /* Same as pp_wrap_text but wrap text only when in line-wrapping mode. */
242 pp_maybe_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
244 if (pp_is_wrapping_line (pp
))
245 pp_wrap_text (pp
, start
, end
);
247 pp_append_text (pp
, start
, end
);
250 /* Append to the output area of PRETTY-PRINTER a string specified by its
251 STARTing character and LENGTH. */
253 pp_append_r (pretty_printer
*pp
, const char *start
, int length
)
255 output_buffer_append_r (pp_buffer (pp
), start
, length
);
258 /* Insert enough spaces into the output area of PRETTY-PRINTER to bring
259 the column position to the current indentation level, assuming that a
260 newline has just been written to the buffer. */
262 pp_indent (pretty_printer
*pp
)
264 int n
= pp_indentation (pp
);
267 for (i
= 0; i
< n
; ++i
)
271 /* The following format specifiers are recognized as being client independent:
272 %d, %i: (signed) integer in base ten.
273 %u: unsigned integer in base ten.
274 %o: unsigned integer in base eight.
275 %x: unsigned integer in base sixteen.
276 %ld, %li, %lo, %lu, %lx: long versions of the above.
277 %lld, %lli, %llo, %llu, %llx: long long versions.
278 %wd, %wi, %wo, %wu, %wx: HOST_WIDE_INT versions.
282 %r: if pp_show_color(pp), switch to color identified by const char *.
283 %R: if pp_show_color(pp), reset color.
284 %m: strerror(text->err_no) - does not consume a value from args_ptr.
288 %': apostrophe (should only be used in untranslated messages;
289 translations should use appropriate punctuation directly).
290 %.*s: a substring the length of which is specified by an argument
292 %Ns: likewise, but length specified as constant in the format string.
293 Flag 'q': quote formatted text (must come immediately after '%').
295 Arguments can be used sequentially, or through %N$ resp. *N$
296 notation Nth argument after the format string. If %N$ / *N$
297 notation is used, it must be used for all arguments, except %m, %%,
298 %<, %> and %', which may not have a number, as they do not consume
299 an argument. When %M$.*N$s is used, M must be N + 1. (This may
300 also be written %M$.*s, provided N is not otherwise used.) The
301 format string must have conversion specifiers with argument numbers
302 1 up to highest argument; each argument may only be used once.
303 A format string can have at most 30 arguments. */
305 /* Formatting phases 1 and 2: render TEXT->format_spec plus
306 TEXT->args_ptr into a series of chunks in pp_buffer (PP)->args[].
307 Phase 3 is in pp_format_text. */
310 pp_format (pretty_printer
*pp
, text_info
*text
)
312 output_buffer
*buffer
= pp_buffer (pp
);
315 struct chunk_info
*new_chunk_array
;
317 unsigned int curarg
= 0, chunk
= 0, argno
;
318 pp_wrapping_mode_t old_wrapping_mode
;
319 bool any_unnumbered
= false, any_numbered
= false;
320 const char **formatters
[PP_NL_ARGMAX
];
322 /* Allocate a new chunk structure. */
323 new_chunk_array
= XOBNEW (&buffer
->chunk_obstack
, struct chunk_info
);
324 new_chunk_array
->prev
= buffer
->cur_chunk_array
;
325 buffer
->cur_chunk_array
= new_chunk_array
;
326 args
= new_chunk_array
->args
;
328 /* Formatting phase 1: split up TEXT->format_spec into chunks in
329 pp_buffer (PP)->args[]. Even-numbered chunks are to be output
330 verbatim, odd-numbered chunks are format specifiers.
331 %m, %%, %<, %>, and %' are replaced with the appropriate text at
334 memset (formatters
, 0, sizeof formatters
);
336 for (p
= text
->format_spec
; *p
; )
338 while (*p
!= '\0' && *p
!= '%')
340 obstack_1grow (&buffer
->chunk_obstack
, *p
);
353 obstack_1grow (&buffer
->chunk_obstack
, '%');
359 obstack_grow (&buffer
->chunk_obstack
,
360 open_quote
, strlen (open_quote
));
362 = colorize_start (pp_show_color (pp
), "quote");
363 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
370 const char *colorstr
= colorize_stop (pp_show_color (pp
));
371 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
375 obstack_grow (&buffer
->chunk_obstack
,
376 close_quote
, strlen (close_quote
));
382 const char *colorstr
= colorize_stop (pp_show_color (pp
));
383 obstack_grow (&buffer
->chunk_obstack
, colorstr
,
391 const char *errstr
= xstrerror (text
->err_no
);
392 obstack_grow (&buffer
->chunk_obstack
, errstr
, strlen (errstr
));
398 /* Handled in phase 2. Terminate the plain chunk here. */
399 obstack_1grow (&buffer
->chunk_obstack
, '\0');
400 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
401 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
408 argno
= strtoul (p
, &end
, 10) - 1;
410 gcc_assert (*p
== '$');
414 gcc_assert (!any_unnumbered
);
419 any_unnumbered
= true;
420 gcc_assert (!any_numbered
);
422 gcc_assert (argno
< PP_NL_ARGMAX
);
423 gcc_assert (!formatters
[argno
]);
424 formatters
[argno
] = &args
[chunk
];
427 obstack_1grow (&buffer
->chunk_obstack
, *p
);
430 while (strchr ("qwl+#", p
[-1]));
434 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
435 (where M == N + 1). */
440 obstack_1grow (&buffer
->chunk_obstack
, *p
);
443 while (ISDIGIT (p
[-1]));
444 gcc_assert (p
[-1] == 's');
448 gcc_assert (*p
== '*');
449 obstack_1grow (&buffer
->chunk_obstack
, '*');
455 unsigned int argno2
= strtoul (p
, &end
, 10) - 1;
457 gcc_assert (argno2
== argno
- 1);
458 gcc_assert (!any_unnumbered
);
459 gcc_assert (*p
== '$');
462 formatters
[argno2
] = formatters
[argno
];
466 gcc_assert (!any_numbered
);
467 formatters
[argno
+1] = formatters
[argno
];
470 gcc_assert (*p
== 's');
471 obstack_1grow (&buffer
->chunk_obstack
, 's');
478 obstack_1grow (&buffer
->chunk_obstack
, '\0');
479 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
480 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
483 obstack_1grow (&buffer
->chunk_obstack
, '\0');
484 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
485 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
488 /* Set output to the argument obstack, and switch line-wrapping and
490 buffer
->obstack
= &buffer
->chunk_obstack
;
491 old_wrapping_mode
= pp_set_verbatim_wrapping (pp
);
493 /* Second phase. Replace each formatter with the formatted text it
496 for (argno
= 0; formatters
[argno
]; argno
++)
504 /* We do not attempt to enforce any ordering on the modifier
507 for (p
= *formatters
[argno
];; p
++)
532 /* We don't support precision beyond that of "long long". */
533 gcc_assert (precision
< 2);
540 gcc_assert (!wide
|| precision
== 0);
544 pp_string (pp
, open_quote
);
545 pp_string (pp
, colorize_start (pp_show_color (pp
), "quote"));
551 pp_string (pp
, colorize_start (pp_show_color (pp
),
552 va_arg (*text
->args_ptr
,
557 pp_character (pp
, va_arg (*text
->args_ptr
, int));
563 pp_wide_integer (pp
, va_arg (*text
->args_ptr
, HOST_WIDE_INT
));
565 pp_integer_with_precision
566 (pp
, *text
->args_ptr
, precision
, int, "d");
571 pp_scalar (pp
, "%" HOST_WIDE_INT_PRINT
"o",
572 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
574 pp_integer_with_precision
575 (pp
, *text
->args_ptr
, precision
, unsigned, "o");
579 pp_string (pp
, va_arg (*text
->args_ptr
, const char *));
583 pp_pointer (pp
, va_arg (*text
->args_ptr
, void *));
588 pp_scalar (pp
, HOST_WIDE_INT_PRINT_UNSIGNED
,
589 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
591 pp_integer_with_precision
592 (pp
, *text
->args_ptr
, precision
, unsigned, "u");
597 pp_scalar (pp
, HOST_WIDE_INT_PRINT_HEX
,
598 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
600 pp_integer_with_precision
601 (pp
, *text
->args_ptr
, precision
, unsigned, "x");
609 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
610 (where M == N + 1). The format string should be verified
611 already from the first phase. */
616 n
= strtoul (p
, &end
, 10);
618 gcc_assert (*p
== 's');
622 gcc_assert (*p
== '*');
624 gcc_assert (*p
== 's');
625 n
= va_arg (*text
->args_ptr
, int);
627 /* This consumes a second entry in the formatters array. */
628 gcc_assert (formatters
[argno
] == formatters
[argno
+1]);
632 s
= va_arg (*text
->args_ptr
, const char *);
633 pp_append_text (pp
, s
, s
+ n
);
641 gcc_assert (pp_format_decoder (pp
));
642 ok
= pp_format_decoder (pp
) (pp
, text
, p
,
643 precision
, wide
, plus
, hash
);
650 pp_string (pp
, colorize_stop (pp_show_color (pp
)));
651 pp_string (pp
, close_quote
);
654 obstack_1grow (&buffer
->chunk_obstack
, '\0');
655 *formatters
[argno
] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
659 for (; argno
< PP_NL_ARGMAX
; argno
++)
660 gcc_assert (!formatters
[argno
]);
662 /* Revert to normal obstack and wrapping mode. */
663 buffer
->obstack
= &buffer
->formatted_obstack
;
664 buffer
->line_length
= 0;
665 pp_wrapping_mode (pp
) = old_wrapping_mode
;
669 /* Format of a message pointed to by TEXT. */
671 pp_output_formatted_text (pretty_printer
*pp
)
674 output_buffer
*buffer
= pp_buffer (pp
);
675 struct chunk_info
*chunk_array
= buffer
->cur_chunk_array
;
676 const char **args
= chunk_array
->args
;
678 gcc_assert (buffer
->obstack
== &buffer
->formatted_obstack
);
679 gcc_assert (buffer
->line_length
== 0);
681 /* This is a third phase, first 2 phases done in pp_format_args.
682 Now we actually print it. */
683 for (chunk
= 0; args
[chunk
]; chunk
++)
684 pp_string (pp
, args
[chunk
]);
686 /* Deallocate the chunk structure and everything after it (i.e. the
687 associated series of formatted strings). */
688 buffer
->cur_chunk_array
= chunk_array
->prev
;
689 obstack_free (&buffer
->chunk_obstack
, chunk_array
);
692 /* Helper subroutine of output_verbatim and verbatim. Do the appropriate
693 settings needed by BUFFER for a verbatim formatting. */
695 pp_format_verbatim (pretty_printer
*pp
, text_info
*text
)
697 /* Set verbatim mode. */
698 pp_wrapping_mode_t oldmode
= pp_set_verbatim_wrapping (pp
);
700 /* Do the actual formatting. */
701 pp_format (pp
, text
);
702 pp_output_formatted_text (pp
);
704 /* Restore previous settings. */
705 pp_wrapping_mode (pp
) = oldmode
;
708 /* Flush the content of BUFFER onto the attached stream. This
709 function does nothing unless pp->output_buffer->flush_p. */
711 pp_flush (pretty_printer
*pp
)
714 if (!pp
->buffer
->flush_p
)
716 pp_write_text_to_stream (pp
);
717 fflush (pp_buffer (pp
)->stream
);
720 /* Flush the content of BUFFER onto the attached stream independently
721 of the value of pp->output_buffer->flush_p. */
723 pp_really_flush (pretty_printer
*pp
)
726 pp_write_text_to_stream (pp
);
727 fflush (pp_buffer (pp
)->stream
);
730 /* Sets the number of maximum characters per line PRETTY-PRINTER can
731 output in line-wrapping mode. A LENGTH value 0 suppresses
734 pp_set_line_maximum_length (pretty_printer
*pp
, int length
)
736 pp_line_cutoff (pp
) = length
;
737 pp_set_real_maximum_length (pp
);
740 /* Clear PRETTY-PRINTER output area text info. */
742 pp_clear_output_area (pretty_printer
*pp
)
744 obstack_free (pp_buffer (pp
)->obstack
,
745 obstack_base (pp_buffer (pp
)->obstack
));
746 pp_buffer (pp
)->line_length
= 0;
749 /* Set PREFIX for PRETTY-PRINTER. */
751 pp_set_prefix (pretty_printer
*pp
, const char *prefix
)
754 pp_set_real_maximum_length (pp
);
755 pp
->emitted_prefix
= false;
756 pp_indentation (pp
) = 0;
759 /* Free PRETTY-PRINTER's prefix, a previously malloc()'d string. */
761 pp_destroy_prefix (pretty_printer
*pp
)
763 if (pp
->prefix
!= NULL
)
765 free (CONST_CAST (char *, pp
->prefix
));
770 /* Write out PRETTY-PRINTER's prefix. */
772 pp_emit_prefix (pretty_printer
*pp
)
774 if (pp
->prefix
!= NULL
)
776 switch (pp_prefixing_rule (pp
))
779 case DIAGNOSTICS_SHOW_PREFIX_NEVER
:
782 case DIAGNOSTICS_SHOW_PREFIX_ONCE
:
783 if (pp
->emitted_prefix
)
788 pp_indentation (pp
) += 3;
791 case DIAGNOSTICS_SHOW_PREFIX_EVERY_LINE
:
793 int prefix_length
= strlen (pp
->prefix
);
794 pp_append_r (pp
, pp
->prefix
, prefix_length
);
795 pp
->emitted_prefix
= true;
802 /* Construct a PRETTY-PRINTER with PREFIX and of MAXIMUM_LENGTH
803 characters per line. */
805 pretty_printer::pretty_printer (const char *p
, int l
)
806 : buffer (new (XCNEW (output_buffer
)) output_buffer ()),
815 translate_identifiers (true),
818 pp_line_cutoff (this) = l
;
819 /* By default, we emit prefixes once per message. */
820 pp_prefixing_rule (this) = DIAGNOSTICS_SHOW_PREFIX_ONCE
;
821 pp_set_prefix (this, p
);
824 pretty_printer::~pretty_printer ()
826 buffer
->~output_buffer ();
830 /* Append a string delimited by START and END to the output area of
831 PRETTY-PRINTER. No line wrapping is done. However, if beginning a
832 new line then emit PRETTY-PRINTER's prefix and skip any leading
833 whitespace if appropriate. The caller must ensure that it is
836 pp_append_text (pretty_printer
*pp
, const char *start
, const char *end
)
838 /* Emit prefix and skip whitespace if we're starting a new line. */
839 if (pp_buffer (pp
)->line_length
== 0)
842 if (pp_is_wrapping_line (pp
))
843 while (start
!= end
&& *start
== ' ')
846 pp_append_r (pp
, start
, end
- start
);
849 /* Finishes constructing a NULL-terminated character string representing
850 the PRETTY-PRINTED text. */
852 pp_formatted_text (pretty_printer
*pp
)
854 return output_buffer_formatted_text (pp_buffer (pp
));
857 /* Return a pointer to the last character emitted in PRETTY-PRINTER's
858 output area. A NULL pointer means no character available. */
860 pp_last_position_in_text (const pretty_printer
*pp
)
862 return output_buffer_last_position_in_text (pp_buffer (pp
));
865 /* Return the amount of characters PRETTY-PRINTER can accept to
866 make a full line. Meaningful only in line-wrapping mode. */
868 pp_remaining_character_count_for_line (pretty_printer
*pp
)
870 return pp
->maximum_length
- pp_buffer (pp
)->line_length
;
874 /* Format a message into BUFFER a la printf. */
876 pp_printf (pretty_printer
*pp
, const char *msg
, ...)
884 text
.format_spec
= msg
;
885 pp_format (pp
, &text
);
886 pp_output_formatted_text (pp
);
891 /* Output MESSAGE verbatim into BUFFER. */
893 pp_verbatim (pretty_printer
*pp
, const char *msg
, ...)
901 text
.format_spec
= msg
;
902 pp_format_verbatim (pp
, &text
);
908 /* Have PRETTY-PRINTER start a new line. */
910 pp_newline (pretty_printer
*pp
)
912 obstack_1grow (pp_buffer (pp
)->obstack
, '\n');
913 pp_needs_newline (pp
) = false;
914 pp_buffer (pp
)->line_length
= 0;
917 /* Have PRETTY-PRINTER add a CHARACTER. */
919 pp_character (pretty_printer
*pp
, int c
)
921 if (pp_is_wrapping_line (pp
)
922 && pp_remaining_character_count_for_line (pp
) <= 0)
928 obstack_1grow (pp_buffer (pp
)->obstack
, c
);
929 ++pp_buffer (pp
)->line_length
;
932 /* Append a STRING to the output area of PRETTY-PRINTER; the STRING may
933 be line-wrapped if in appropriate mode. */
935 pp_string (pretty_printer
*pp
, const char *str
)
937 gcc_checking_assert (str
);
938 pp_maybe_wrap_text (pp
, str
, str
+ strlen (str
));
941 /* Maybe print out a whitespace if needed. */
944 pp_maybe_space (pretty_printer
*pp
)
946 if (pp
->padding
!= pp_none
)
949 pp
->padding
= pp_none
;
953 // Add a newline to the pretty printer PP and flush formatted text.
956 pp_newline_and_flush (pretty_printer
*pp
)
960 pp_needs_newline (pp
) = false;
963 // Add a newline to the pretty printer PP, followed by indentation.
966 pp_newline_and_indent (pretty_printer
*pp
, int n
)
968 pp_indentation (pp
) += n
;
971 pp_needs_newline (pp
) = false;
974 // Add separator C, followed by a single whitespace.
977 pp_separate_with (pretty_printer
*pp
, char c
)
979 pp_character (pp
, c
);
984 /* The string starting at P has LEN (at least 1) bytes left; if they
985 start with a valid UTF-8 sequence, return the length of that
986 sequence and set *VALUE to the value of that sequence, and
987 otherwise return 0 and set *VALUE to (unsigned int) -1. */
990 decode_utf8_char (const unsigned char *p
, size_t len
, unsigned int *value
)
1001 for (t
= *p
; t
& 0x80; t
<<= 1)
1004 if (utf8_len
> len
|| utf8_len
< 2 || utf8_len
> 6)
1006 *value
= (unsigned int) -1;
1009 ch
= *p
& ((1 << (7 - utf8_len
)) - 1);
1010 for (i
= 1; i
< utf8_len
; i
++)
1012 unsigned int u
= p
[i
];
1013 if ((u
& 0xC0) != 0x80)
1015 *value
= (unsigned int) -1;
1018 ch
= (ch
<< 6) | (u
& 0x3F);
1020 if ( (ch
<= 0x7F && utf8_len
> 1)
1021 || (ch
<= 0x7FF && utf8_len
> 2)
1022 || (ch
<= 0xFFFF && utf8_len
> 3)
1023 || (ch
<= 0x1FFFFF && utf8_len
> 4)
1024 || (ch
<= 0x3FFFFFF && utf8_len
> 5)
1025 || (ch
>= 0xD800 && ch
<= 0xDFFF))
1027 *value
= (unsigned int) -1;
1040 /* Allocator for identifier_to_locale and corresponding function to
1043 void *(*identifier_to_locale_alloc
) (size_t) = xmalloc
;
1044 void (*identifier_to_locale_free
) (void *) = free
;
1046 /* Given IDENT, an identifier in the internal encoding, return a
1047 version of IDENT suitable for diagnostics in the locale character
1048 set: either IDENT itself, or a string, allocated using
1049 identifier_to_locale_alloc, converted to the locale character set
1050 and using escape sequences if not representable in the locale
1051 character set or containing control characters or invalid byte
1052 sequences. Existing backslashes in IDENT are not doubled, so the
1053 result may not uniquely specify the contents of an arbitrary byte
1054 sequence identifier. */
1057 identifier_to_locale (const char *ident
)
1059 const unsigned char *uid
= (const unsigned char *) ident
;
1060 size_t idlen
= strlen (ident
);
1061 bool valid_printable_utf8
= true;
1062 bool all_ascii
= true;
1065 for (i
= 0; i
< idlen
;)
1068 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
1069 if (utf8_len
== 0 || c
<= 0x1F || (c
>= 0x7F && c
<= 0x9F))
1071 valid_printable_utf8
= false;
1079 /* If IDENT contains invalid UTF-8 sequences (which may occur with
1080 attributes putting arbitrary byte sequences in identifiers), or
1081 control characters, we use octal escape sequences for all bytes
1082 outside printable ASCII. */
1083 if (!valid_printable_utf8
)
1085 char *ret
= (char *) identifier_to_locale_alloc (4 * idlen
+ 1);
1087 for (i
= 0; i
< idlen
; i
++)
1089 if (uid
[i
] > 0x1F && uid
[i
] < 0x7F)
1093 sprintf (p
, "\\%03o", uid
[i
]);
1101 /* Otherwise, if it is valid printable ASCII, or printable UTF-8
1102 with the locale character set being UTF-8, IDENT is used. */
1103 if (all_ascii
|| locale_utf8
)
1106 /* Otherwise IDENT is converted to the locale character set if
1108 #if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV
1109 if (locale_encoding
!= NULL
)
1111 iconv_t cd
= iconv_open (locale_encoding
, "UTF-8");
1112 bool conversion_ok
= true;
1114 if (cd
!= (iconv_t
) -1)
1116 size_t ret_alloc
= 4 * idlen
+ 1;
1119 /* Repeat the whole conversion process as needed with
1120 larger buffers so non-reversible transformations can
1121 always be detected. */
1122 ICONV_CONST
char *inbuf
= CONST_CAST (char *, ident
);
1124 size_t inbytesleft
= idlen
;
1125 size_t outbytesleft
= ret_alloc
- 1;
1128 ret
= (char *) identifier_to_locale_alloc (ret_alloc
);
1131 if (iconv (cd
, 0, 0, 0, 0) == (size_t) -1)
1133 conversion_ok
= false;
1137 iconv_ret
= iconv (cd
, &inbuf
, &inbytesleft
,
1138 &outbuf
, &outbytesleft
);
1139 if (iconv_ret
== (size_t) -1 || inbytesleft
!= 0)
1144 identifier_to_locale_free (ret
);
1150 conversion_ok
= false;
1154 else if (iconv_ret
!= 0)
1156 conversion_ok
= false;
1159 /* Return to initial shift state. */
1160 if (iconv (cd
, 0, 0, &outbuf
, &outbytesleft
) == (size_t) -1)
1165 identifier_to_locale_free (ret
);
1171 conversion_ok
= false;
1185 /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */
1187 char *ret
= (char *) identifier_to_locale_alloc (10 * idlen
+ 1);
1189 for (i
= 0; i
< idlen
;)
1192 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
1197 sprintf (p
, "\\U%08x", c
);