1 /* Various declarations for language-independent pretty-print subroutines.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Gabriel Dos Reis <gdr@integrable-solutions.net>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "pretty-print.h"
26 #include "diagnostic-color.h"
28 #include <new> // For placement-new.
34 // Default construct an output buffer.
36 output_buffer::output_buffer ()
37 : formatted_obstack (),
39 obstack (&formatted_obstack
),
45 obstack_init (&formatted_obstack
);
46 obstack_init (&chunk_obstack
);
49 // Release resources owned by an output buffer at the end of lifetime.
51 output_buffer::~output_buffer ()
53 obstack_free (&chunk_obstack
, obstack_finish (&chunk_obstack
));
54 obstack_free (&formatted_obstack
, obstack_finish (&formatted_obstack
));
57 /* A pointer to the formatted diagnostic message. */
58 #define pp_formatted_text_data(PP) \
59 ((const char *) obstack_base (pp_buffer (PP)->obstack))
61 /* Format an integer given by va_arg (ARG, type-specifier T) where
62 type-specifier is a precision modifier as indicated by PREC. F is
63 a string used to construct the appropriate format-specifier. */
64 #define pp_integer_with_precision(PP, ARG, PREC, T, F) \
69 pp_scalar (PP, "%" F, va_arg (ARG, T)); \
73 pp_scalar (PP, "%l" F, va_arg (ARG, long T)); \
77 pp_scalar (PP, "%" HOST_LONG_LONG_FORMAT F, va_arg (ARG, long long T)); \
86 /* Subroutine of pp_set_maximum_length. Set up PRETTY-PRINTER's
87 internal maximum characters per line. */
89 pp_set_real_maximum_length (pretty_printer
*pp
)
91 /* If we're told not to wrap lines then do the obvious thing. In case
92 we'll emit prefix only once per message, it is appropriate
93 not to increase unnecessarily the line-length cut-off. */
94 if (!pp_is_wrapping_line (pp
)
95 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_ONCE
96 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_NEVER
)
97 pp
->maximum_length
= pp_line_cutoff (pp
);
100 int prefix_length
= pp
->prefix
? strlen (pp
->prefix
) : 0;
101 /* If the prefix is ridiculously too long, output at least
103 if (pp_line_cutoff (pp
) - prefix_length
< 32)
104 pp
->maximum_length
= pp_line_cutoff (pp
) + 32;
106 pp
->maximum_length
= pp_line_cutoff (pp
);
110 /* Clear PRETTY-PRINTER's output state. */
112 pp_clear_state (pretty_printer
*pp
)
114 pp
->emitted_prefix
= false;
115 pp_indentation (pp
) = 0;
118 /* Flush the formatted text of PRETTY-PRINTER onto the attached stream. */
120 pp_write_text_to_stream (pretty_printer
*pp
)
122 const char *text
= pp_formatted_text (pp
);
123 fputs (text
, pp_buffer (pp
)->stream
);
124 pp_clear_output_area (pp
);
127 /* As pp_write_text_to_stream, but for GraphViz label output.
129 Flush the formatted text of pretty-printer PP onto the attached stream.
130 Replace characters in PPF that have special meaning in a GraphViz .dot
133 This routine is not very fast, but it doesn't have to be as this is only
134 be used by routines dumping intermediate representations in graph form. */
137 pp_write_text_as_dot_label_to_stream (pretty_printer
*pp
, bool for_record
)
139 const char *text
= pp_formatted_text (pp
);
140 const char *p
= text
;
141 FILE *fp
= pp_buffer (pp
)->stream
;
147 /* Print newlines as a left-aligned newline. */
149 fputs ("\\l\\\n", fp
);
152 /* A pipe is only special for record-shape nodes. */
159 /* The following characters always have to be escaped
160 for use in labels. */
176 pp_clear_output_area (pp
);
179 /* Wrap a text delimited by START and END into PRETTY-PRINTER. */
181 pp_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
183 bool wrapping_line
= pp_is_wrapping_line (pp
);
187 /* Dump anything bordered by whitespaces. */
189 const char *p
= start
;
190 while (p
!= end
&& !ISBLANK (*p
) && *p
!= '\n')
193 && p
- start
>= pp_remaining_character_count_for_line (pp
))
195 pp_append_text (pp
, start
, p
);
199 if (start
!= end
&& ISBLANK (*start
))
204 if (start
!= end
&& *start
== '\n')
212 /* Same as pp_wrap_text but wrap text only when in line-wrapping mode. */
214 pp_maybe_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
216 if (pp_is_wrapping_line (pp
))
217 pp_wrap_text (pp
, start
, end
);
219 pp_append_text (pp
, start
, end
);
222 /* Append to the output area of PRETTY-PRINTER a string specified by its
223 STARTing character and LENGTH. */
225 pp_append_r (pretty_printer
*pp
, const char *start
, int length
)
227 obstack_grow (pp_buffer (pp
)->obstack
, start
, length
);
228 pp_buffer (pp
)->line_length
+= length
;
231 /* Insert enough spaces into the output area of PRETTY-PRINTER to bring
232 the column position to the current indentation level, assuming that a
233 newline has just been written to the buffer. */
235 pp_indent (pretty_printer
*pp
)
237 int n
= pp_indentation (pp
);
240 for (i
= 0; i
< n
; ++i
)
244 /* The following format specifiers are recognized as being client independent:
245 %d, %i: (signed) integer in base ten.
246 %u: unsigned integer in base ten.
247 %o: unsigned integer in base eight.
248 %x: unsigned integer in base sixteen.
249 %ld, %li, %lo, %lu, %lx: long versions of the above.
250 %lld, %lli, %llo, %llu, %llx: long long versions.
251 %wd, %wi, %wo, %wu, %wx: HOST_WIDE_INT versions.
255 %r: if pp_show_color(pp), switch to color identified by const char *.
256 %R: if pp_show_color(pp), reset color.
257 %m: strerror(text->err_no) - does not consume a value from args_ptr.
261 %': apostrophe (should only be used in untranslated messages;
262 translations should use appropriate punctuation directly).
263 %.*s: a substring the length of which is specified by an argument
265 %Ns: likewise, but length specified as constant in the format string.
266 Flag 'q': quote formatted text (must come immediately after '%').
268 Arguments can be used sequentially, or through %N$ resp. *N$
269 notation Nth argument after the format string. If %N$ / *N$
270 notation is used, it must be used for all arguments, except %m, %%,
271 %<, %> and %', which may not have a number, as they do not consume
272 an argument. When %M$.*N$s is used, M must be N + 1. (This may
273 also be written %M$.*s, provided N is not otherwise used.) The
274 format string must have conversion specifiers with argument numbers
275 1 up to highest argument; each argument may only be used once.
276 A format string can have at most 30 arguments. */
278 /* Formatting phases 1 and 2: render TEXT->format_spec plus
279 TEXT->args_ptr into a series of chunks in pp_buffer (PP)->args[].
280 Phase 3 is in pp_format_text. */
283 pp_format (pretty_printer
*pp
, text_info
*text
)
285 output_buffer
*buffer
= pp_buffer (pp
);
288 struct chunk_info
*new_chunk_array
;
290 unsigned int curarg
= 0, chunk
= 0, argno
;
291 pp_wrapping_mode_t old_wrapping_mode
;
292 bool any_unnumbered
= false, any_numbered
= false;
293 const char **formatters
[PP_NL_ARGMAX
];
295 /* Allocate a new chunk structure. */
296 new_chunk_array
= XOBNEW (&buffer
->chunk_obstack
, struct chunk_info
);
297 new_chunk_array
->prev
= buffer
->cur_chunk_array
;
298 buffer
->cur_chunk_array
= new_chunk_array
;
299 args
= new_chunk_array
->args
;
301 /* Formatting phase 1: split up TEXT->format_spec into chunks in
302 pp_buffer (PP)->args[]. Even-numbered chunks are to be output
303 verbatim, odd-numbered chunks are format specifiers.
304 %m, %%, %<, %>, and %' are replaced with the appropriate text at
307 memset (formatters
, 0, sizeof formatters
);
309 for (p
= text
->format_spec
; *p
; )
311 while (*p
!= '\0' && *p
!= '%')
313 obstack_1grow (&buffer
->chunk_obstack
, *p
);
326 obstack_1grow (&buffer
->chunk_obstack
, '%');
332 obstack_grow (&buffer
->chunk_obstack
,
333 open_quote
, strlen (open_quote
));
335 = colorize_start (pp_show_color (pp
), "quote");
336 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
343 const char *colorstr
= colorize_stop (pp_show_color (pp
));
344 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
348 obstack_grow (&buffer
->chunk_obstack
,
349 close_quote
, strlen (close_quote
));
355 const char *colorstr
= colorize_stop (pp_show_color (pp
));
356 obstack_grow (&buffer
->chunk_obstack
, colorstr
,
364 const char *errstr
= xstrerror (text
->err_no
);
365 obstack_grow (&buffer
->chunk_obstack
, errstr
, strlen (errstr
));
371 /* Handled in phase 2. Terminate the plain chunk here. */
372 obstack_1grow (&buffer
->chunk_obstack
, '\0');
373 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
374 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
381 argno
= strtoul (p
, &end
, 10) - 1;
383 gcc_assert (*p
== '$');
387 gcc_assert (!any_unnumbered
);
392 any_unnumbered
= true;
393 gcc_assert (!any_numbered
);
395 gcc_assert (argno
< PP_NL_ARGMAX
);
396 gcc_assert (!formatters
[argno
]);
397 formatters
[argno
] = &args
[chunk
];
400 obstack_1grow (&buffer
->chunk_obstack
, *p
);
403 while (strchr ("qwl+#", p
[-1]));
407 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
408 (where M == N + 1). */
413 obstack_1grow (&buffer
->chunk_obstack
, *p
);
416 while (ISDIGIT (p
[-1]));
417 gcc_assert (p
[-1] == 's');
421 gcc_assert (*p
== '*');
422 obstack_1grow (&buffer
->chunk_obstack
, '*');
428 unsigned int argno2
= strtoul (p
, &end
, 10) - 1;
430 gcc_assert (argno2
== argno
- 1);
431 gcc_assert (!any_unnumbered
);
432 gcc_assert (*p
== '$');
435 formatters
[argno2
] = formatters
[argno
];
439 gcc_assert (!any_numbered
);
440 formatters
[argno
+1] = formatters
[argno
];
443 gcc_assert (*p
== 's');
444 obstack_1grow (&buffer
->chunk_obstack
, 's');
451 obstack_1grow (&buffer
->chunk_obstack
, '\0');
452 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
453 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
456 obstack_1grow (&buffer
->chunk_obstack
, '\0');
457 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
458 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
461 /* Set output to the argument obstack, and switch line-wrapping and
463 buffer
->obstack
= &buffer
->chunk_obstack
;
464 old_wrapping_mode
= pp_set_verbatim_wrapping (pp
);
466 /* Second phase. Replace each formatter with the formatted text it
469 for (argno
= 0; formatters
[argno
]; argno
++)
477 /* We do not attempt to enforce any ordering on the modifier
480 for (p
= *formatters
[argno
];; p
++)
505 /* We don't support precision beyond that of "long long". */
506 gcc_assert (precision
< 2);
513 gcc_assert (!wide
|| precision
== 0);
517 pp_string (pp
, open_quote
);
518 pp_string (pp
, colorize_start (pp_show_color (pp
), "quote"));
524 pp_string (pp
, colorize_start (pp_show_color (pp
),
525 va_arg (*text
->args_ptr
,
530 pp_character (pp
, va_arg (*text
->args_ptr
, int));
536 pp_wide_integer (pp
, va_arg (*text
->args_ptr
, HOST_WIDE_INT
));
538 pp_integer_with_precision
539 (pp
, *text
->args_ptr
, precision
, int, "d");
544 pp_scalar (pp
, "%" HOST_WIDE_INT_PRINT
"o",
545 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
547 pp_integer_with_precision
548 (pp
, *text
->args_ptr
, precision
, unsigned, "o");
552 pp_string (pp
, va_arg (*text
->args_ptr
, const char *));
556 pp_pointer (pp
, va_arg (*text
->args_ptr
, void *));
561 pp_scalar (pp
, HOST_WIDE_INT_PRINT_UNSIGNED
,
562 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
564 pp_integer_with_precision
565 (pp
, *text
->args_ptr
, precision
, unsigned, "u");
570 pp_scalar (pp
, HOST_WIDE_INT_PRINT_HEX
,
571 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
573 pp_integer_with_precision
574 (pp
, *text
->args_ptr
, precision
, unsigned, "x");
582 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
583 (where M == N + 1). The format string should be verified
584 already from the first phase. */
589 n
= strtoul (p
, &end
, 10);
591 gcc_assert (*p
== 's');
595 gcc_assert (*p
== '*');
597 gcc_assert (*p
== 's');
598 n
= va_arg (*text
->args_ptr
, int);
600 /* This consumes a second entry in the formatters array. */
601 gcc_assert (formatters
[argno
] == formatters
[argno
+1]);
605 s
= va_arg (*text
->args_ptr
, const char *);
606 pp_append_text (pp
, s
, s
+ n
);
614 gcc_assert (pp_format_decoder (pp
));
615 ok
= pp_format_decoder (pp
) (pp
, text
, p
,
616 precision
, wide
, plus
, hash
);
623 pp_string (pp
, colorize_stop (pp_show_color (pp
)));
624 pp_string (pp
, close_quote
);
627 obstack_1grow (&buffer
->chunk_obstack
, '\0');
628 *formatters
[argno
] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
631 #ifdef ENABLE_CHECKING
632 for (; argno
< PP_NL_ARGMAX
; argno
++)
633 gcc_assert (!formatters
[argno
]);
636 /* Revert to normal obstack and wrapping mode. */
637 buffer
->obstack
= &buffer
->formatted_obstack
;
638 buffer
->line_length
= 0;
639 pp_wrapping_mode (pp
) = old_wrapping_mode
;
643 /* Format of a message pointed to by TEXT. */
645 pp_output_formatted_text (pretty_printer
*pp
)
648 output_buffer
*buffer
= pp_buffer (pp
);
649 struct chunk_info
*chunk_array
= buffer
->cur_chunk_array
;
650 const char **args
= chunk_array
->args
;
652 gcc_assert (buffer
->obstack
== &buffer
->formatted_obstack
);
653 gcc_assert (buffer
->line_length
== 0);
655 /* This is a third phase, first 2 phases done in pp_format_args.
656 Now we actually print it. */
657 for (chunk
= 0; args
[chunk
]; chunk
++)
658 pp_string (pp
, args
[chunk
]);
660 /* Deallocate the chunk structure and everything after it (i.e. the
661 associated series of formatted strings). */
662 buffer
->cur_chunk_array
= chunk_array
->prev
;
663 obstack_free (&buffer
->chunk_obstack
, chunk_array
);
666 /* Helper subroutine of output_verbatim and verbatim. Do the appropriate
667 settings needed by BUFFER for a verbatim formatting. */
669 pp_format_verbatim (pretty_printer
*pp
, text_info
*text
)
671 /* Set verbatim mode. */
672 pp_wrapping_mode_t oldmode
= pp_set_verbatim_wrapping (pp
);
674 /* Do the actual formatting. */
675 pp_format (pp
, text
);
676 pp_output_formatted_text (pp
);
678 /* Restore previous settings. */
679 pp_wrapping_mode (pp
) = oldmode
;
682 /* Flush the content of BUFFER onto the attached stream. */
684 pp_flush (pretty_printer
*pp
)
686 pp_write_text_to_stream (pp
);
688 fflush (pp_buffer (pp
)->stream
);
691 /* Sets the number of maximum characters per line PRETTY-PRINTER can
692 output in line-wrapping mode. A LENGTH value 0 suppresses
695 pp_set_line_maximum_length (pretty_printer
*pp
, int length
)
697 pp_line_cutoff (pp
) = length
;
698 pp_set_real_maximum_length (pp
);
701 /* Clear PRETTY-PRINTER output area text info. */
703 pp_clear_output_area (pretty_printer
*pp
)
705 obstack_free (pp_buffer (pp
)->obstack
,
706 obstack_base (pp_buffer (pp
)->obstack
));
707 pp_buffer (pp
)->line_length
= 0;
710 /* Set PREFIX for PRETTY-PRINTER. */
712 pp_set_prefix (pretty_printer
*pp
, const char *prefix
)
715 pp_set_real_maximum_length (pp
);
716 pp
->emitted_prefix
= false;
717 pp_indentation (pp
) = 0;
720 /* Free PRETTY-PRINTER's prefix, a previously malloc()'d string. */
722 pp_destroy_prefix (pretty_printer
*pp
)
724 if (pp
->prefix
!= NULL
)
726 free (CONST_CAST (char *, pp
->prefix
));
731 /* Write out PRETTY-PRINTER's prefix. */
733 pp_emit_prefix (pretty_printer
*pp
)
735 if (pp
->prefix
!= NULL
)
737 switch (pp_prefixing_rule (pp
))
740 case DIAGNOSTICS_SHOW_PREFIX_NEVER
:
743 case DIAGNOSTICS_SHOW_PREFIX_ONCE
:
744 if (pp
->emitted_prefix
)
749 pp_indentation (pp
) += 3;
752 case DIAGNOSTICS_SHOW_PREFIX_EVERY_LINE
:
754 int prefix_length
= strlen (pp
->prefix
);
755 pp_append_r (pp
, pp
->prefix
, prefix_length
);
756 pp
->emitted_prefix
= true;
763 /* Construct a PRETTY-PRINTER with PREFIX and of MAXIMUM_LENGTH
764 characters per line. */
766 pretty_printer::pretty_printer (const char *p
, int l
)
767 : buffer (new (XCNEW (output_buffer
)) output_buffer ()),
776 translate_identifiers (true),
779 pp_line_cutoff (this) = l
;
780 /* By default, we emit prefixes once per message. */
781 pp_prefixing_rule (this) = DIAGNOSTICS_SHOW_PREFIX_ONCE
;
782 pp_set_prefix (this, p
);
785 pretty_printer::~pretty_printer ()
787 buffer
->~output_buffer ();
791 /* Append a string delimited by START and END to the output area of
792 PRETTY-PRINTER. No line wrapping is done. However, if beginning a
793 new line then emit PRETTY-PRINTER's prefix and skip any leading
794 whitespace if appropriate. The caller must ensure that it is
797 pp_append_text (pretty_printer
*pp
, const char *start
, const char *end
)
799 /* Emit prefix and skip whitespace if we're starting a new line. */
800 if (pp_buffer (pp
)->line_length
== 0)
803 if (pp_is_wrapping_line (pp
))
804 while (start
!= end
&& *start
== ' ')
807 pp_append_r (pp
, start
, end
- start
);
810 /* Finishes constructing a NULL-terminated character string representing
811 the PRETTY-PRINTED text. */
813 pp_formatted_text (pretty_printer
*pp
)
815 obstack_1grow (pp_buffer (pp
)->obstack
, '\0');
816 return pp_formatted_text_data (pp
);
819 /* Return a pointer to the last character emitted in PRETTY-PRINTER's
820 output area. A NULL pointer means no character available. */
822 pp_last_position_in_text (const pretty_printer
*pp
)
824 const char *p
= NULL
;
825 struct obstack
*text
= pp_buffer (pp
)->obstack
;
827 if (obstack_base (text
) != obstack_next_free (text
))
828 p
= ((const char *) obstack_next_free (text
)) - 1;
832 /* Return the amount of characters PRETTY-PRINTER can accept to
833 make a full line. Meaningful only in line-wrapping mode. */
835 pp_remaining_character_count_for_line (pretty_printer
*pp
)
837 return pp
->maximum_length
- pp_buffer (pp
)->line_length
;
841 /* Format a message into BUFFER a la printf. */
843 pp_printf (pretty_printer
*pp
, const char *msg
, ...)
851 text
.format_spec
= msg
;
853 pp_format (pp
, &text
);
854 pp_output_formatted_text (pp
);
859 /* Output MESSAGE verbatim into BUFFER. */
861 pp_verbatim (pretty_printer
*pp
, const char *msg
, ...)
869 text
.format_spec
= msg
;
871 pp_format_verbatim (pp
, &text
);
877 /* Have PRETTY-PRINTER start a new line. */
879 pp_newline (pretty_printer
*pp
)
881 obstack_1grow (pp_buffer (pp
)->obstack
, '\n');
882 pp_needs_newline (pp
) = false;
883 pp_buffer (pp
)->line_length
= 0;
886 /* Have PRETTY-PRINTER add a CHARACTER. */
888 pp_character (pretty_printer
*pp
, int c
)
890 if (pp_is_wrapping_line (pp
)
891 && pp_remaining_character_count_for_line (pp
) <= 0)
897 obstack_1grow (pp_buffer (pp
)->obstack
, c
);
898 ++pp_buffer (pp
)->line_length
;
901 /* Append a STRING to the output area of PRETTY-PRINTER; the STRING may
902 be line-wrapped if in appropriate mode. */
904 pp_string (pretty_printer
*pp
, const char *str
)
906 pp_maybe_wrap_text (pp
, str
, str
+ (str
? strlen (str
) : 0));
909 /* Maybe print out a whitespace if needed. */
912 pp_maybe_space (pretty_printer
*pp
)
914 if (pp
->padding
!= pp_none
)
917 pp
->padding
= pp_none
;
921 // Add a newline to the pretty printer PP and flush formatted text.
924 pp_newline_and_flush (pretty_printer
*pp
)
928 pp_needs_newline (pp
) = false;
931 // Add a newline to the pretty printer PP, followed by indentation.
934 pp_newline_and_indent (pretty_printer
*pp
, int n
)
936 pp_indentation (pp
) += n
;
939 pp_needs_newline (pp
) = false;
942 // Add separator C, followed by a single whitespace.
945 pp_separate_with (pretty_printer
*pp
, char c
)
947 pp_character (pp
, c
);
952 /* The string starting at P has LEN (at least 1) bytes left; if they
953 start with a valid UTF-8 sequence, return the length of that
954 sequence and set *VALUE to the value of that sequence, and
955 otherwise return 0 and set *VALUE to (unsigned int) -1. */
958 decode_utf8_char (const unsigned char *p
, size_t len
, unsigned int *value
)
969 for (t
= *p
; t
& 0x80; t
<<= 1)
972 if (utf8_len
> len
|| utf8_len
< 2 || utf8_len
> 6)
974 *value
= (unsigned int) -1;
977 ch
= *p
& ((1 << (7 - utf8_len
)) - 1);
978 for (i
= 1; i
< utf8_len
; i
++)
980 unsigned int u
= p
[i
];
981 if ((u
& 0xC0) != 0x80)
983 *value
= (unsigned int) -1;
986 ch
= (ch
<< 6) | (u
& 0x3F);
988 if ( (ch
<= 0x7F && utf8_len
> 1)
989 || (ch
<= 0x7FF && utf8_len
> 2)
990 || (ch
<= 0xFFFF && utf8_len
> 3)
991 || (ch
<= 0x1FFFFF && utf8_len
> 4)
992 || (ch
<= 0x3FFFFFF && utf8_len
> 5)
993 || (ch
>= 0xD800 && ch
<= 0xDFFF))
995 *value
= (unsigned int) -1;
1008 /* Allocator for identifier_to_locale and corresponding function to
1011 void *(*identifier_to_locale_alloc
) (size_t) = xmalloc
;
1012 void (*identifier_to_locale_free
) (void *) = free
;
1014 /* Given IDENT, an identifier in the internal encoding, return a
1015 version of IDENT suitable for diagnostics in the locale character
1016 set: either IDENT itself, or a string, allocated using
1017 identifier_to_locale_alloc, converted to the locale character set
1018 and using escape sequences if not representable in the locale
1019 character set or containing control characters or invalid byte
1020 sequences. Existing backslashes in IDENT are not doubled, so the
1021 result may not uniquely specify the contents of an arbitrary byte
1022 sequence identifier. */
1025 identifier_to_locale (const char *ident
)
1027 const unsigned char *uid
= (const unsigned char *) ident
;
1028 size_t idlen
= strlen (ident
);
1029 bool valid_printable_utf8
= true;
1030 bool all_ascii
= true;
1033 for (i
= 0; i
< idlen
;)
1036 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
1037 if (utf8_len
== 0 || c
<= 0x1F || (c
>= 0x7F && c
<= 0x9F))
1039 valid_printable_utf8
= false;
1047 /* If IDENT contains invalid UTF-8 sequences (which may occur with
1048 attributes putting arbitrary byte sequences in identifiers), or
1049 control characters, we use octal escape sequences for all bytes
1050 outside printable ASCII. */
1051 if (!valid_printable_utf8
)
1053 char *ret
= (char *) identifier_to_locale_alloc (4 * idlen
+ 1);
1055 for (i
= 0; i
< idlen
; i
++)
1057 if (uid
[i
] > 0x1F && uid
[i
] < 0x7F)
1061 sprintf (p
, "\\%03o", uid
[i
]);
1069 /* Otherwise, if it is valid printable ASCII, or printable UTF-8
1070 with the locale character set being UTF-8, IDENT is used. */
1071 if (all_ascii
|| locale_utf8
)
1074 /* Otherwise IDENT is converted to the locale character set if
1076 #if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV
1077 if (locale_encoding
!= NULL
)
1079 iconv_t cd
= iconv_open (locale_encoding
, "UTF-8");
1080 bool conversion_ok
= true;
1082 if (cd
!= (iconv_t
) -1)
1084 size_t ret_alloc
= 4 * idlen
+ 1;
1087 /* Repeat the whole conversion process as needed with
1088 larger buffers so non-reversible transformations can
1089 always be detected. */
1090 ICONV_CONST
char *inbuf
= CONST_CAST (char *, ident
);
1092 size_t inbytesleft
= idlen
;
1093 size_t outbytesleft
= ret_alloc
- 1;
1096 ret
= (char *) identifier_to_locale_alloc (ret_alloc
);
1099 if (iconv (cd
, 0, 0, 0, 0) == (size_t) -1)
1101 conversion_ok
= false;
1105 iconv_ret
= iconv (cd
, &inbuf
, &inbytesleft
,
1106 &outbuf
, &outbytesleft
);
1107 if (iconv_ret
== (size_t) -1 || inbytesleft
!= 0)
1112 identifier_to_locale_free (ret
);
1118 conversion_ok
= false;
1122 else if (iconv_ret
!= 0)
1124 conversion_ok
= false;
1127 /* Return to initial shift state. */
1128 if (iconv (cd
, 0, 0, &outbuf
, &outbytesleft
) == (size_t) -1)
1133 identifier_to_locale_free (ret
);
1139 conversion_ok
= false;
1153 /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */
1155 char *ret
= (char *) identifier_to_locale_alloc (10 * idlen
+ 1);
1157 for (i
= 0; i
< idlen
;)
1160 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
1165 sprintf (p
, "\\U%08x", c
);