1 /* Various declarations for language-independent pretty-print subroutines.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Gabriel Dos Reis <gdr@integrable-solutions.net>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "pretty-print.h"
26 #include "diagnostic-color.h"
32 /* Overwrite the given location/range within this text_info's rich_location.
33 For use e.g. when implementing "+" in client format decoders. */
36 text_info::set_location (unsigned int idx
, location_t loc
, bool show_caret_p
)
38 gcc_checking_assert (m_richloc
);
39 m_richloc
->set_range (line_table
, idx
, loc
, show_caret_p
);
43 text_info::get_location (unsigned int index_of_location
) const
45 gcc_checking_assert (m_richloc
);
47 if (index_of_location
== 0)
48 return m_richloc
->get_loc ();
50 return UNKNOWN_LOCATION
;
53 // Default construct an output buffer.
55 output_buffer::output_buffer ()
56 : formatted_obstack (),
58 obstack (&formatted_obstack
),
65 obstack_init (&formatted_obstack
);
66 obstack_init (&chunk_obstack
);
69 // Release resources owned by an output buffer at the end of lifetime.
71 output_buffer::~output_buffer ()
73 obstack_free (&chunk_obstack
, NULL
);
74 obstack_free (&formatted_obstack
, NULL
);
78 /* Format an integer given by va_arg (ARG, type-specifier T) where
79 type-specifier is a precision modifier as indicated by PREC. F is
80 a string used to construct the appropriate format-specifier. */
81 #define pp_integer_with_precision(PP, ARG, PREC, T, F) \
86 pp_scalar (PP, "%" F, va_arg (ARG, T)); \
90 pp_scalar (PP, "%l" F, va_arg (ARG, long T)); \
94 pp_scalar (PP, "%" HOST_LONG_LONG_FORMAT F, va_arg (ARG, long long T)); \
103 /* Subroutine of pp_set_maximum_length. Set up PRETTY-PRINTER's
104 internal maximum characters per line. */
106 pp_set_real_maximum_length (pretty_printer
*pp
)
108 /* If we're told not to wrap lines then do the obvious thing. In case
109 we'll emit prefix only once per message, it is appropriate
110 not to increase unnecessarily the line-length cut-off. */
111 if (!pp_is_wrapping_line (pp
)
112 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_ONCE
113 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_NEVER
)
114 pp
->maximum_length
= pp_line_cutoff (pp
);
117 int prefix_length
= pp
->prefix
? strlen (pp
->prefix
) : 0;
118 /* If the prefix is ridiculously too long, output at least
120 if (pp_line_cutoff (pp
) - prefix_length
< 32)
121 pp
->maximum_length
= pp_line_cutoff (pp
) + 32;
123 pp
->maximum_length
= pp_line_cutoff (pp
);
127 /* Clear PRETTY-PRINTER's output state. */
129 pp_clear_state (pretty_printer
*pp
)
131 pp
->emitted_prefix
= false;
132 pp_indentation (pp
) = 0;
135 /* Flush the formatted text of PRETTY-PRINTER onto the attached stream. */
137 pp_write_text_to_stream (pretty_printer
*pp
)
139 const char *text
= pp_formatted_text (pp
);
140 fputs (text
, pp_buffer (pp
)->stream
);
141 pp_clear_output_area (pp
);
144 /* As pp_write_text_to_stream, but for GraphViz label output.
146 Flush the formatted text of pretty-printer PP onto the attached stream.
147 Replace characters in PPF that have special meaning in a GraphViz .dot
150 This routine is not very fast, but it doesn't have to be as this is only
151 be used by routines dumping intermediate representations in graph form. */
154 pp_write_text_as_dot_label_to_stream (pretty_printer
*pp
, bool for_record
)
156 const char *text
= pp_formatted_text (pp
);
157 const char *p
= text
;
158 FILE *fp
= pp_buffer (pp
)->stream
;
164 /* Print newlines as a left-aligned newline. */
166 fputs ("\\l\\\n", fp
);
169 /* A pipe is only special for record-shape nodes. */
176 /* The following characters always have to be escaped
177 for use in labels. */
193 pp_clear_output_area (pp
);
196 /* Wrap a text delimited by START and END into PRETTY-PRINTER. */
198 pp_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
200 bool wrapping_line
= pp_is_wrapping_line (pp
);
204 /* Dump anything bordered by whitespaces. */
206 const char *p
= start
;
207 while (p
!= end
&& !ISBLANK (*p
) && *p
!= '\n')
210 && p
- start
>= pp_remaining_character_count_for_line (pp
))
212 pp_append_text (pp
, start
, p
);
216 if (start
!= end
&& ISBLANK (*start
))
221 if (start
!= end
&& *start
== '\n')
229 /* Same as pp_wrap_text but wrap text only when in line-wrapping mode. */
231 pp_maybe_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
233 if (pp_is_wrapping_line (pp
))
234 pp_wrap_text (pp
, start
, end
);
236 pp_append_text (pp
, start
, end
);
239 /* Append to the output area of PRETTY-PRINTER a string specified by its
240 STARTing character and LENGTH. */
242 pp_append_r (pretty_printer
*pp
, const char *start
, int length
)
244 output_buffer_append_r (pp_buffer (pp
), start
, length
);
247 /* Insert enough spaces into the output area of PRETTY-PRINTER to bring
248 the column position to the current indentation level, assuming that a
249 newline has just been written to the buffer. */
251 pp_indent (pretty_printer
*pp
)
253 int n
= pp_indentation (pp
);
256 for (i
= 0; i
< n
; ++i
)
260 /* The following format specifiers are recognized as being client independent:
261 %d, %i: (signed) integer in base ten.
262 %u: unsigned integer in base ten.
263 %o: unsigned integer in base eight.
264 %x: unsigned integer in base sixteen.
265 %ld, %li, %lo, %lu, %lx: long versions of the above.
266 %lld, %lli, %llo, %llu, %llx: long long versions.
267 %wd, %wi, %wo, %wu, %wx: HOST_WIDE_INT versions.
271 %r: if pp_show_color(pp), switch to color identified by const char *.
272 %R: if pp_show_color(pp), reset color.
273 %m: strerror(text->err_no) - does not consume a value from args_ptr.
277 %': apostrophe (should only be used in untranslated messages;
278 translations should use appropriate punctuation directly).
279 %.*s: a substring the length of which is specified by an argument
281 %Ns: likewise, but length specified as constant in the format string.
282 Flag 'q': quote formatted text (must come immediately after '%').
284 Arguments can be used sequentially, or through %N$ resp. *N$
285 notation Nth argument after the format string. If %N$ / *N$
286 notation is used, it must be used for all arguments, except %m, %%,
287 %<, %> and %', which may not have a number, as they do not consume
288 an argument. When %M$.*N$s is used, M must be N + 1. (This may
289 also be written %M$.*s, provided N is not otherwise used.) The
290 format string must have conversion specifiers with argument numbers
291 1 up to highest argument; each argument may only be used once.
292 A format string can have at most 30 arguments. */
294 /* Formatting phases 1 and 2: render TEXT->format_spec plus
295 TEXT->args_ptr into a series of chunks in pp_buffer (PP)->args[].
296 Phase 3 is in pp_format_text. */
299 pp_format (pretty_printer
*pp
, text_info
*text
)
301 output_buffer
*buffer
= pp_buffer (pp
);
304 struct chunk_info
*new_chunk_array
;
306 unsigned int curarg
= 0, chunk
= 0, argno
;
307 pp_wrapping_mode_t old_wrapping_mode
;
308 bool any_unnumbered
= false, any_numbered
= false;
309 const char **formatters
[PP_NL_ARGMAX
];
311 /* Allocate a new chunk structure. */
312 new_chunk_array
= XOBNEW (&buffer
->chunk_obstack
, struct chunk_info
);
313 new_chunk_array
->prev
= buffer
->cur_chunk_array
;
314 buffer
->cur_chunk_array
= new_chunk_array
;
315 args
= new_chunk_array
->args
;
317 /* Formatting phase 1: split up TEXT->format_spec into chunks in
318 pp_buffer (PP)->args[]. Even-numbered chunks are to be output
319 verbatim, odd-numbered chunks are format specifiers.
320 %m, %%, %<, %>, and %' are replaced with the appropriate text at
323 memset (formatters
, 0, sizeof formatters
);
325 for (p
= text
->format_spec
; *p
; )
327 while (*p
!= '\0' && *p
!= '%')
329 obstack_1grow (&buffer
->chunk_obstack
, *p
);
342 obstack_1grow (&buffer
->chunk_obstack
, '%');
348 obstack_grow (&buffer
->chunk_obstack
,
349 open_quote
, strlen (open_quote
));
351 = colorize_start (pp_show_color (pp
), "quote");
352 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
359 const char *colorstr
= colorize_stop (pp_show_color (pp
));
360 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
364 obstack_grow (&buffer
->chunk_obstack
,
365 close_quote
, strlen (close_quote
));
371 const char *colorstr
= colorize_stop (pp_show_color (pp
));
372 obstack_grow (&buffer
->chunk_obstack
, colorstr
,
380 const char *errstr
= xstrerror (text
->err_no
);
381 obstack_grow (&buffer
->chunk_obstack
, errstr
, strlen (errstr
));
387 /* Handled in phase 2. Terminate the plain chunk here. */
388 obstack_1grow (&buffer
->chunk_obstack
, '\0');
389 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
390 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
397 argno
= strtoul (p
, &end
, 10) - 1;
399 gcc_assert (*p
== '$');
403 gcc_assert (!any_unnumbered
);
408 any_unnumbered
= true;
409 gcc_assert (!any_numbered
);
411 gcc_assert (argno
< PP_NL_ARGMAX
);
412 gcc_assert (!formatters
[argno
]);
413 formatters
[argno
] = &args
[chunk
];
416 obstack_1grow (&buffer
->chunk_obstack
, *p
);
419 while (strchr ("qwl+#", p
[-1]));
423 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
424 (where M == N + 1). */
429 obstack_1grow (&buffer
->chunk_obstack
, *p
);
432 while (ISDIGIT (p
[-1]));
433 gcc_assert (p
[-1] == 's');
437 gcc_assert (*p
== '*');
438 obstack_1grow (&buffer
->chunk_obstack
, '*');
444 unsigned int argno2
= strtoul (p
, &end
, 10) - 1;
446 gcc_assert (argno2
== argno
- 1);
447 gcc_assert (!any_unnumbered
);
448 gcc_assert (*p
== '$');
451 formatters
[argno2
] = formatters
[argno
];
455 gcc_assert (!any_numbered
);
456 formatters
[argno
+1] = formatters
[argno
];
459 gcc_assert (*p
== 's');
460 obstack_1grow (&buffer
->chunk_obstack
, 's');
467 obstack_1grow (&buffer
->chunk_obstack
, '\0');
468 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
469 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
472 obstack_1grow (&buffer
->chunk_obstack
, '\0');
473 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
474 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
477 /* Set output to the argument obstack, and switch line-wrapping and
479 buffer
->obstack
= &buffer
->chunk_obstack
;
480 old_wrapping_mode
= pp_set_verbatim_wrapping (pp
);
482 /* Second phase. Replace each formatter with the formatted text it
485 for (argno
= 0; formatters
[argno
]; argno
++)
493 /* We do not attempt to enforce any ordering on the modifier
496 for (p
= *formatters
[argno
];; p
++)
521 /* We don't support precision beyond that of "long long". */
522 gcc_assert (precision
< 2);
529 gcc_assert (!wide
|| precision
== 0);
533 pp_string (pp
, open_quote
);
534 pp_string (pp
, colorize_start (pp_show_color (pp
), "quote"));
540 pp_string (pp
, colorize_start (pp_show_color (pp
),
541 va_arg (*text
->args_ptr
,
546 pp_character (pp
, va_arg (*text
->args_ptr
, int));
552 pp_wide_integer (pp
, va_arg (*text
->args_ptr
, HOST_WIDE_INT
));
554 pp_integer_with_precision
555 (pp
, *text
->args_ptr
, precision
, int, "d");
560 pp_scalar (pp
, "%" HOST_WIDE_INT_PRINT
"o",
561 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
563 pp_integer_with_precision
564 (pp
, *text
->args_ptr
, precision
, unsigned, "o");
568 pp_string (pp
, va_arg (*text
->args_ptr
, const char *));
572 pp_pointer (pp
, va_arg (*text
->args_ptr
, void *));
577 pp_scalar (pp
, HOST_WIDE_INT_PRINT_UNSIGNED
,
578 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
580 pp_integer_with_precision
581 (pp
, *text
->args_ptr
, precision
, unsigned, "u");
586 pp_scalar (pp
, HOST_WIDE_INT_PRINT_HEX
,
587 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
589 pp_integer_with_precision
590 (pp
, *text
->args_ptr
, precision
, unsigned, "x");
598 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
599 (where M == N + 1). The format string should be verified
600 already from the first phase. */
605 n
= strtoul (p
, &end
, 10);
607 gcc_assert (*p
== 's');
611 gcc_assert (*p
== '*');
613 gcc_assert (*p
== 's');
614 n
= va_arg (*text
->args_ptr
, int);
616 /* This consumes a second entry in the formatters array. */
617 gcc_assert (formatters
[argno
] == formatters
[argno
+1]);
621 s
= va_arg (*text
->args_ptr
, const char *);
622 pp_append_text (pp
, s
, s
+ n
);
630 gcc_assert (pp_format_decoder (pp
));
631 ok
= pp_format_decoder (pp
) (pp
, text
, p
,
632 precision
, wide
, plus
, hash
);
639 pp_string (pp
, colorize_stop (pp_show_color (pp
)));
640 pp_string (pp
, close_quote
);
643 obstack_1grow (&buffer
->chunk_obstack
, '\0');
644 *formatters
[argno
] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
648 for (; argno
< PP_NL_ARGMAX
; argno
++)
649 gcc_assert (!formatters
[argno
]);
651 /* Revert to normal obstack and wrapping mode. */
652 buffer
->obstack
= &buffer
->formatted_obstack
;
653 buffer
->line_length
= 0;
654 pp_wrapping_mode (pp
) = old_wrapping_mode
;
658 /* Format of a message pointed to by TEXT. */
660 pp_output_formatted_text (pretty_printer
*pp
)
663 output_buffer
*buffer
= pp_buffer (pp
);
664 struct chunk_info
*chunk_array
= buffer
->cur_chunk_array
;
665 const char **args
= chunk_array
->args
;
667 gcc_assert (buffer
->obstack
== &buffer
->formatted_obstack
);
668 gcc_assert (buffer
->line_length
== 0);
670 /* This is a third phase, first 2 phases done in pp_format_args.
671 Now we actually print it. */
672 for (chunk
= 0; args
[chunk
]; chunk
++)
673 pp_string (pp
, args
[chunk
]);
675 /* Deallocate the chunk structure and everything after it (i.e. the
676 associated series of formatted strings). */
677 buffer
->cur_chunk_array
= chunk_array
->prev
;
678 obstack_free (&buffer
->chunk_obstack
, chunk_array
);
681 /* Helper subroutine of output_verbatim and verbatim. Do the appropriate
682 settings needed by BUFFER for a verbatim formatting. */
684 pp_format_verbatim (pretty_printer
*pp
, text_info
*text
)
686 /* Set verbatim mode. */
687 pp_wrapping_mode_t oldmode
= pp_set_verbatim_wrapping (pp
);
689 /* Do the actual formatting. */
690 pp_format (pp
, text
);
691 pp_output_formatted_text (pp
);
693 /* Restore previous settings. */
694 pp_wrapping_mode (pp
) = oldmode
;
697 /* Flush the content of BUFFER onto the attached stream. This
698 function does nothing unless pp->output_buffer->flush_p. */
700 pp_flush (pretty_printer
*pp
)
703 if (!pp
->buffer
->flush_p
)
705 pp_write_text_to_stream (pp
);
706 fflush (pp_buffer (pp
)->stream
);
709 /* Flush the content of BUFFER onto the attached stream independently
710 of the value of pp->output_buffer->flush_p. */
712 pp_really_flush (pretty_printer
*pp
)
715 pp_write_text_to_stream (pp
);
716 fflush (pp_buffer (pp
)->stream
);
719 /* Sets the number of maximum characters per line PRETTY-PRINTER can
720 output in line-wrapping mode. A LENGTH value 0 suppresses
723 pp_set_line_maximum_length (pretty_printer
*pp
, int length
)
725 pp_line_cutoff (pp
) = length
;
726 pp_set_real_maximum_length (pp
);
729 /* Clear PRETTY-PRINTER output area text info. */
731 pp_clear_output_area (pretty_printer
*pp
)
733 obstack_free (pp_buffer (pp
)->obstack
,
734 obstack_base (pp_buffer (pp
)->obstack
));
735 pp_buffer (pp
)->line_length
= 0;
738 /* Set PREFIX for PRETTY-PRINTER. */
740 pp_set_prefix (pretty_printer
*pp
, const char *prefix
)
743 pp_set_real_maximum_length (pp
);
744 pp
->emitted_prefix
= false;
745 pp_indentation (pp
) = 0;
748 /* Free PRETTY-PRINTER's prefix, a previously malloc()'d string. */
750 pp_destroy_prefix (pretty_printer
*pp
)
752 if (pp
->prefix
!= NULL
)
754 free (CONST_CAST (char *, pp
->prefix
));
759 /* Write out PRETTY-PRINTER's prefix. */
761 pp_emit_prefix (pretty_printer
*pp
)
763 if (pp
->prefix
!= NULL
)
765 switch (pp_prefixing_rule (pp
))
768 case DIAGNOSTICS_SHOW_PREFIX_NEVER
:
771 case DIAGNOSTICS_SHOW_PREFIX_ONCE
:
772 if (pp
->emitted_prefix
)
777 pp_indentation (pp
) += 3;
780 case DIAGNOSTICS_SHOW_PREFIX_EVERY_LINE
:
782 int prefix_length
= strlen (pp
->prefix
);
783 pp_append_r (pp
, pp
->prefix
, prefix_length
);
784 pp
->emitted_prefix
= true;
791 /* Construct a PRETTY-PRINTER with PREFIX and of MAXIMUM_LENGTH
792 characters per line. */
794 pretty_printer::pretty_printer (const char *p
, int l
)
795 : buffer (new (XCNEW (output_buffer
)) output_buffer ()),
804 translate_identifiers (true),
807 pp_line_cutoff (this) = l
;
808 /* By default, we emit prefixes once per message. */
809 pp_prefixing_rule (this) = DIAGNOSTICS_SHOW_PREFIX_ONCE
;
810 pp_set_prefix (this, p
);
813 pretty_printer::~pretty_printer ()
815 buffer
->~output_buffer ();
819 /* Append a string delimited by START and END to the output area of
820 PRETTY-PRINTER. No line wrapping is done. However, if beginning a
821 new line then emit PRETTY-PRINTER's prefix and skip any leading
822 whitespace if appropriate. The caller must ensure that it is
825 pp_append_text (pretty_printer
*pp
, const char *start
, const char *end
)
827 /* Emit prefix and skip whitespace if we're starting a new line. */
828 if (pp_buffer (pp
)->line_length
== 0)
831 if (pp_is_wrapping_line (pp
))
832 while (start
!= end
&& *start
== ' ')
835 pp_append_r (pp
, start
, end
- start
);
838 /* Finishes constructing a NULL-terminated character string representing
839 the PRETTY-PRINTED text. */
841 pp_formatted_text (pretty_printer
*pp
)
843 return output_buffer_formatted_text (pp_buffer (pp
));
846 /* Return a pointer to the last character emitted in PRETTY-PRINTER's
847 output area. A NULL pointer means no character available. */
849 pp_last_position_in_text (const pretty_printer
*pp
)
851 return output_buffer_last_position_in_text (pp_buffer (pp
));
854 /* Return the amount of characters PRETTY-PRINTER can accept to
855 make a full line. Meaningful only in line-wrapping mode. */
857 pp_remaining_character_count_for_line (pretty_printer
*pp
)
859 return pp
->maximum_length
- pp_buffer (pp
)->line_length
;
863 /* Format a message into BUFFER a la printf. */
865 pp_printf (pretty_printer
*pp
, const char *msg
, ...)
873 text
.format_spec
= msg
;
874 pp_format (pp
, &text
);
875 pp_output_formatted_text (pp
);
880 /* Output MESSAGE verbatim into BUFFER. */
882 pp_verbatim (pretty_printer
*pp
, const char *msg
, ...)
890 text
.format_spec
= msg
;
891 pp_format_verbatim (pp
, &text
);
897 /* Have PRETTY-PRINTER start a new line. */
899 pp_newline (pretty_printer
*pp
)
901 obstack_1grow (pp_buffer (pp
)->obstack
, '\n');
902 pp_needs_newline (pp
) = false;
903 pp_buffer (pp
)->line_length
= 0;
906 /* Have PRETTY-PRINTER add a CHARACTER. */
908 pp_character (pretty_printer
*pp
, int c
)
910 if (pp_is_wrapping_line (pp
)
911 && pp_remaining_character_count_for_line (pp
) <= 0)
917 obstack_1grow (pp_buffer (pp
)->obstack
, c
);
918 ++pp_buffer (pp
)->line_length
;
921 /* Append a STRING to the output area of PRETTY-PRINTER; the STRING may
922 be line-wrapped if in appropriate mode. */
924 pp_string (pretty_printer
*pp
, const char *str
)
926 gcc_checking_assert (str
);
927 pp_maybe_wrap_text (pp
, str
, str
+ strlen (str
));
930 /* Maybe print out a whitespace if needed. */
933 pp_maybe_space (pretty_printer
*pp
)
935 if (pp
->padding
!= pp_none
)
938 pp
->padding
= pp_none
;
942 // Add a newline to the pretty printer PP and flush formatted text.
945 pp_newline_and_flush (pretty_printer
*pp
)
949 pp_needs_newline (pp
) = false;
952 // Add a newline to the pretty printer PP, followed by indentation.
955 pp_newline_and_indent (pretty_printer
*pp
, int n
)
957 pp_indentation (pp
) += n
;
960 pp_needs_newline (pp
) = false;
963 // Add separator C, followed by a single whitespace.
966 pp_separate_with (pretty_printer
*pp
, char c
)
968 pp_character (pp
, c
);
973 /* The string starting at P has LEN (at least 1) bytes left; if they
974 start with a valid UTF-8 sequence, return the length of that
975 sequence and set *VALUE to the value of that sequence, and
976 otherwise return 0 and set *VALUE to (unsigned int) -1. */
979 decode_utf8_char (const unsigned char *p
, size_t len
, unsigned int *value
)
990 for (t
= *p
; t
& 0x80; t
<<= 1)
993 if (utf8_len
> len
|| utf8_len
< 2 || utf8_len
> 6)
995 *value
= (unsigned int) -1;
998 ch
= *p
& ((1 << (7 - utf8_len
)) - 1);
999 for (i
= 1; i
< utf8_len
; i
++)
1001 unsigned int u
= p
[i
];
1002 if ((u
& 0xC0) != 0x80)
1004 *value
= (unsigned int) -1;
1007 ch
= (ch
<< 6) | (u
& 0x3F);
1009 if ( (ch
<= 0x7F && utf8_len
> 1)
1010 || (ch
<= 0x7FF && utf8_len
> 2)
1011 || (ch
<= 0xFFFF && utf8_len
> 3)
1012 || (ch
<= 0x1FFFFF && utf8_len
> 4)
1013 || (ch
<= 0x3FFFFFF && utf8_len
> 5)
1014 || (ch
>= 0xD800 && ch
<= 0xDFFF))
1016 *value
= (unsigned int) -1;
1029 /* Allocator for identifier_to_locale and corresponding function to
1032 void *(*identifier_to_locale_alloc
) (size_t) = xmalloc
;
1033 void (*identifier_to_locale_free
) (void *) = free
;
1035 /* Given IDENT, an identifier in the internal encoding, return a
1036 version of IDENT suitable for diagnostics in the locale character
1037 set: either IDENT itself, or a string, allocated using
1038 identifier_to_locale_alloc, converted to the locale character set
1039 and using escape sequences if not representable in the locale
1040 character set or containing control characters or invalid byte
1041 sequences. Existing backslashes in IDENT are not doubled, so the
1042 result may not uniquely specify the contents of an arbitrary byte
1043 sequence identifier. */
1046 identifier_to_locale (const char *ident
)
1048 const unsigned char *uid
= (const unsigned char *) ident
;
1049 size_t idlen
= strlen (ident
);
1050 bool valid_printable_utf8
= true;
1051 bool all_ascii
= true;
1054 for (i
= 0; i
< idlen
;)
1057 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
1058 if (utf8_len
== 0 || c
<= 0x1F || (c
>= 0x7F && c
<= 0x9F))
1060 valid_printable_utf8
= false;
1068 /* If IDENT contains invalid UTF-8 sequences (which may occur with
1069 attributes putting arbitrary byte sequences in identifiers), or
1070 control characters, we use octal escape sequences for all bytes
1071 outside printable ASCII. */
1072 if (!valid_printable_utf8
)
1074 char *ret
= (char *) identifier_to_locale_alloc (4 * idlen
+ 1);
1076 for (i
= 0; i
< idlen
; i
++)
1078 if (uid
[i
] > 0x1F && uid
[i
] < 0x7F)
1082 sprintf (p
, "\\%03o", uid
[i
]);
1090 /* Otherwise, if it is valid printable ASCII, or printable UTF-8
1091 with the locale character set being UTF-8, IDENT is used. */
1092 if (all_ascii
|| locale_utf8
)
1095 /* Otherwise IDENT is converted to the locale character set if
1097 #if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV
1098 if (locale_encoding
!= NULL
)
1100 iconv_t cd
= iconv_open (locale_encoding
, "UTF-8");
1101 bool conversion_ok
= true;
1103 if (cd
!= (iconv_t
) -1)
1105 size_t ret_alloc
= 4 * idlen
+ 1;
1108 /* Repeat the whole conversion process as needed with
1109 larger buffers so non-reversible transformations can
1110 always be detected. */
1111 ICONV_CONST
char *inbuf
= CONST_CAST (char *, ident
);
1113 size_t inbytesleft
= idlen
;
1114 size_t outbytesleft
= ret_alloc
- 1;
1117 ret
= (char *) identifier_to_locale_alloc (ret_alloc
);
1120 if (iconv (cd
, 0, 0, 0, 0) == (size_t) -1)
1122 conversion_ok
= false;
1126 iconv_ret
= iconv (cd
, &inbuf
, &inbytesleft
,
1127 &outbuf
, &outbytesleft
);
1128 if (iconv_ret
== (size_t) -1 || inbytesleft
!= 0)
1133 identifier_to_locale_free (ret
);
1139 conversion_ok
= false;
1143 else if (iconv_ret
!= 0)
1145 conversion_ok
= false;
1148 /* Return to initial shift state. */
1149 if (iconv (cd
, 0, 0, &outbuf
, &outbytesleft
) == (size_t) -1)
1154 identifier_to_locale_free (ret
);
1160 conversion_ok
= false;
1174 /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */
1176 char *ret
= (char *) identifier_to_locale_alloc (10 * idlen
+ 1);
1178 for (i
= 0; i
< idlen
;)
1181 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
1186 sprintf (p
, "\\U%08x", c
);