1 /* Various declarations for language-independent pretty-print subroutines.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Gabriel Dos Reis <gdr@integrable-solutions.net>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "pretty-print.h"
26 #include "diagnostic-color.h"
28 #include <new> // For placement-new.
34 // Default construct an output buffer.
36 output_buffer::output_buffer ()
37 : formatted_obstack (),
39 obstack (&formatted_obstack
),
46 obstack_init (&formatted_obstack
);
47 obstack_init (&chunk_obstack
);
50 // Release resources owned by an output buffer at the end of lifetime.
52 output_buffer::~output_buffer ()
54 obstack_free (&chunk_obstack
, NULL
);
55 obstack_free (&formatted_obstack
, NULL
);
59 /* Format an integer given by va_arg (ARG, type-specifier T) where
60 type-specifier is a precision modifier as indicated by PREC. F is
61 a string used to construct the appropriate format-specifier. */
62 #define pp_integer_with_precision(PP, ARG, PREC, T, F) \
67 pp_scalar (PP, "%" F, va_arg (ARG, T)); \
71 pp_scalar (PP, "%l" F, va_arg (ARG, long T)); \
75 pp_scalar (PP, "%" HOST_LONG_LONG_FORMAT F, va_arg (ARG, long long T)); \
84 /* Subroutine of pp_set_maximum_length. Set up PRETTY-PRINTER's
85 internal maximum characters per line. */
87 pp_set_real_maximum_length (pretty_printer
*pp
)
89 /* If we're told not to wrap lines then do the obvious thing. In case
90 we'll emit prefix only once per message, it is appropriate
91 not to increase unnecessarily the line-length cut-off. */
92 if (!pp_is_wrapping_line (pp
)
93 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_ONCE
94 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_NEVER
)
95 pp
->maximum_length
= pp_line_cutoff (pp
);
98 int prefix_length
= pp
->prefix
? strlen (pp
->prefix
) : 0;
99 /* If the prefix is ridiculously too long, output at least
101 if (pp_line_cutoff (pp
) - prefix_length
< 32)
102 pp
->maximum_length
= pp_line_cutoff (pp
) + 32;
104 pp
->maximum_length
= pp_line_cutoff (pp
);
108 /* Clear PRETTY-PRINTER's output state. */
110 pp_clear_state (pretty_printer
*pp
)
112 pp
->emitted_prefix
= false;
113 pp_indentation (pp
) = 0;
116 /* Flush the formatted text of PRETTY-PRINTER onto the attached stream. */
118 pp_write_text_to_stream (pretty_printer
*pp
)
120 const char *text
= pp_formatted_text (pp
);
121 fputs (text
, pp_buffer (pp
)->stream
);
122 pp_clear_output_area (pp
);
125 /* As pp_write_text_to_stream, but for GraphViz label output.
127 Flush the formatted text of pretty-printer PP onto the attached stream.
128 Replace characters in PPF that have special meaning in a GraphViz .dot
131 This routine is not very fast, but it doesn't have to be as this is only
132 be used by routines dumping intermediate representations in graph form. */
135 pp_write_text_as_dot_label_to_stream (pretty_printer
*pp
, bool for_record
)
137 const char *text
= pp_formatted_text (pp
);
138 const char *p
= text
;
139 FILE *fp
= pp_buffer (pp
)->stream
;
145 /* Print newlines as a left-aligned newline. */
147 fputs ("\\l\\\n", fp
);
150 /* A pipe is only special for record-shape nodes. */
157 /* The following characters always have to be escaped
158 for use in labels. */
174 pp_clear_output_area (pp
);
177 /* Wrap a text delimited by START and END into PRETTY-PRINTER. */
179 pp_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
181 bool wrapping_line
= pp_is_wrapping_line (pp
);
185 /* Dump anything bordered by whitespaces. */
187 const char *p
= start
;
188 while (p
!= end
&& !ISBLANK (*p
) && *p
!= '\n')
191 && p
- start
>= pp_remaining_character_count_for_line (pp
))
193 pp_append_text (pp
, start
, p
);
197 if (start
!= end
&& ISBLANK (*start
))
202 if (start
!= end
&& *start
== '\n')
210 /* Same as pp_wrap_text but wrap text only when in line-wrapping mode. */
212 pp_maybe_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
214 if (pp_is_wrapping_line (pp
))
215 pp_wrap_text (pp
, start
, end
);
217 pp_append_text (pp
, start
, end
);
220 /* Append to the output area of PRETTY-PRINTER a string specified by its
221 STARTing character and LENGTH. */
223 pp_append_r (pretty_printer
*pp
, const char *start
, int length
)
225 output_buffer_append_r (pp_buffer (pp
), start
, length
);
228 /* Insert enough spaces into the output area of PRETTY-PRINTER to bring
229 the column position to the current indentation level, assuming that a
230 newline has just been written to the buffer. */
232 pp_indent (pretty_printer
*pp
)
234 int n
= pp_indentation (pp
);
237 for (i
= 0; i
< n
; ++i
)
241 /* The following format specifiers are recognized as being client independent:
242 %d, %i: (signed) integer in base ten.
243 %u: unsigned integer in base ten.
244 %o: unsigned integer in base eight.
245 %x: unsigned integer in base sixteen.
246 %ld, %li, %lo, %lu, %lx: long versions of the above.
247 %lld, %lli, %llo, %llu, %llx: long long versions.
248 %wd, %wi, %wo, %wu, %wx: HOST_WIDE_INT versions.
252 %r: if pp_show_color(pp), switch to color identified by const char *.
253 %R: if pp_show_color(pp), reset color.
254 %m: strerror(text->err_no) - does not consume a value from args_ptr.
258 %': apostrophe (should only be used in untranslated messages;
259 translations should use appropriate punctuation directly).
260 %.*s: a substring the length of which is specified by an argument
262 %Ns: likewise, but length specified as constant in the format string.
263 Flag 'q': quote formatted text (must come immediately after '%').
265 Arguments can be used sequentially, or through %N$ resp. *N$
266 notation Nth argument after the format string. If %N$ / *N$
267 notation is used, it must be used for all arguments, except %m, %%,
268 %<, %> and %', which may not have a number, as they do not consume
269 an argument. When %M$.*N$s is used, M must be N + 1. (This may
270 also be written %M$.*s, provided N is not otherwise used.) The
271 format string must have conversion specifiers with argument numbers
272 1 up to highest argument; each argument may only be used once.
273 A format string can have at most 30 arguments. */
275 /* Formatting phases 1 and 2: render TEXT->format_spec plus
276 TEXT->args_ptr into a series of chunks in pp_buffer (PP)->args[].
277 Phase 3 is in pp_format_text. */
280 pp_format (pretty_printer
*pp
, text_info
*text
)
282 output_buffer
*buffer
= pp_buffer (pp
);
285 struct chunk_info
*new_chunk_array
;
287 unsigned int curarg
= 0, chunk
= 0, argno
;
288 pp_wrapping_mode_t old_wrapping_mode
;
289 bool any_unnumbered
= false, any_numbered
= false;
290 const char **formatters
[PP_NL_ARGMAX
];
292 /* Allocate a new chunk structure. */
293 new_chunk_array
= XOBNEW (&buffer
->chunk_obstack
, struct chunk_info
);
294 new_chunk_array
->prev
= buffer
->cur_chunk_array
;
295 buffer
->cur_chunk_array
= new_chunk_array
;
296 args
= new_chunk_array
->args
;
298 /* Formatting phase 1: split up TEXT->format_spec into chunks in
299 pp_buffer (PP)->args[]. Even-numbered chunks are to be output
300 verbatim, odd-numbered chunks are format specifiers.
301 %m, %%, %<, %>, and %' are replaced with the appropriate text at
304 memset (formatters
, 0, sizeof formatters
);
306 for (p
= text
->format_spec
; *p
; )
308 while (*p
!= '\0' && *p
!= '%')
310 obstack_1grow (&buffer
->chunk_obstack
, *p
);
323 obstack_1grow (&buffer
->chunk_obstack
, '%');
329 obstack_grow (&buffer
->chunk_obstack
,
330 open_quote
, strlen (open_quote
));
332 = colorize_start (pp_show_color (pp
), "quote");
333 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
340 const char *colorstr
= colorize_stop (pp_show_color (pp
));
341 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
345 obstack_grow (&buffer
->chunk_obstack
,
346 close_quote
, strlen (close_quote
));
352 const char *colorstr
= colorize_stop (pp_show_color (pp
));
353 obstack_grow (&buffer
->chunk_obstack
, colorstr
,
361 const char *errstr
= xstrerror (text
->err_no
);
362 obstack_grow (&buffer
->chunk_obstack
, errstr
, strlen (errstr
));
368 /* Handled in phase 2. Terminate the plain chunk here. */
369 obstack_1grow (&buffer
->chunk_obstack
, '\0');
370 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
371 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
378 argno
= strtoul (p
, &end
, 10) - 1;
380 gcc_assert (*p
== '$');
384 gcc_assert (!any_unnumbered
);
389 any_unnumbered
= true;
390 gcc_assert (!any_numbered
);
392 gcc_assert (argno
< PP_NL_ARGMAX
);
393 gcc_assert (!formatters
[argno
]);
394 formatters
[argno
] = &args
[chunk
];
397 obstack_1grow (&buffer
->chunk_obstack
, *p
);
400 while (strchr ("qwl+#", p
[-1]));
404 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
405 (where M == N + 1). */
410 obstack_1grow (&buffer
->chunk_obstack
, *p
);
413 while (ISDIGIT (p
[-1]));
414 gcc_assert (p
[-1] == 's');
418 gcc_assert (*p
== '*');
419 obstack_1grow (&buffer
->chunk_obstack
, '*');
425 unsigned int argno2
= strtoul (p
, &end
, 10) - 1;
427 gcc_assert (argno2
== argno
- 1);
428 gcc_assert (!any_unnumbered
);
429 gcc_assert (*p
== '$');
432 formatters
[argno2
] = formatters
[argno
];
436 gcc_assert (!any_numbered
);
437 formatters
[argno
+1] = formatters
[argno
];
440 gcc_assert (*p
== 's');
441 obstack_1grow (&buffer
->chunk_obstack
, 's');
448 obstack_1grow (&buffer
->chunk_obstack
, '\0');
449 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
450 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
453 obstack_1grow (&buffer
->chunk_obstack
, '\0');
454 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
455 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
458 /* Set output to the argument obstack, and switch line-wrapping and
460 buffer
->obstack
= &buffer
->chunk_obstack
;
461 old_wrapping_mode
= pp_set_verbatim_wrapping (pp
);
463 /* Second phase. Replace each formatter with the formatted text it
466 for (argno
= 0; formatters
[argno
]; argno
++)
474 /* We do not attempt to enforce any ordering on the modifier
477 for (p
= *formatters
[argno
];; p
++)
502 /* We don't support precision beyond that of "long long". */
503 gcc_assert (precision
< 2);
510 gcc_assert (!wide
|| precision
== 0);
514 pp_string (pp
, open_quote
);
515 pp_string (pp
, colorize_start (pp_show_color (pp
), "quote"));
521 pp_string (pp
, colorize_start (pp_show_color (pp
),
522 va_arg (*text
->args_ptr
,
527 pp_character (pp
, va_arg (*text
->args_ptr
, int));
533 pp_wide_integer (pp
, va_arg (*text
->args_ptr
, HOST_WIDE_INT
));
535 pp_integer_with_precision
536 (pp
, *text
->args_ptr
, precision
, int, "d");
541 pp_scalar (pp
, "%" HOST_WIDE_INT_PRINT
"o",
542 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
544 pp_integer_with_precision
545 (pp
, *text
->args_ptr
, precision
, unsigned, "o");
549 pp_string (pp
, va_arg (*text
->args_ptr
, const char *));
553 pp_pointer (pp
, va_arg (*text
->args_ptr
, void *));
558 pp_scalar (pp
, HOST_WIDE_INT_PRINT_UNSIGNED
,
559 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
561 pp_integer_with_precision
562 (pp
, *text
->args_ptr
, precision
, unsigned, "u");
567 pp_scalar (pp
, HOST_WIDE_INT_PRINT_HEX
,
568 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
570 pp_integer_with_precision
571 (pp
, *text
->args_ptr
, precision
, unsigned, "x");
579 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
580 (where M == N + 1). The format string should be verified
581 already from the first phase. */
586 n
= strtoul (p
, &end
, 10);
588 gcc_assert (*p
== 's');
592 gcc_assert (*p
== '*');
594 gcc_assert (*p
== 's');
595 n
= va_arg (*text
->args_ptr
, int);
597 /* This consumes a second entry in the formatters array. */
598 gcc_assert (formatters
[argno
] == formatters
[argno
+1]);
602 s
= va_arg (*text
->args_ptr
, const char *);
603 pp_append_text (pp
, s
, s
+ n
);
611 gcc_assert (pp_format_decoder (pp
));
612 ok
= pp_format_decoder (pp
) (pp
, text
, p
,
613 precision
, wide
, plus
, hash
);
620 pp_string (pp
, colorize_stop (pp_show_color (pp
)));
621 pp_string (pp
, close_quote
);
624 obstack_1grow (&buffer
->chunk_obstack
, '\0');
625 *formatters
[argno
] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
628 #ifdef ENABLE_CHECKING
629 for (; argno
< PP_NL_ARGMAX
; argno
++)
630 gcc_assert (!formatters
[argno
]);
633 /* Revert to normal obstack and wrapping mode. */
634 buffer
->obstack
= &buffer
->formatted_obstack
;
635 buffer
->line_length
= 0;
636 pp_wrapping_mode (pp
) = old_wrapping_mode
;
640 /* Format of a message pointed to by TEXT. */
642 pp_output_formatted_text (pretty_printer
*pp
)
645 output_buffer
*buffer
= pp_buffer (pp
);
646 struct chunk_info
*chunk_array
= buffer
->cur_chunk_array
;
647 const char **args
= chunk_array
->args
;
649 gcc_assert (buffer
->obstack
== &buffer
->formatted_obstack
);
650 gcc_assert (buffer
->line_length
== 0);
652 /* This is a third phase, first 2 phases done in pp_format_args.
653 Now we actually print it. */
654 for (chunk
= 0; args
[chunk
]; chunk
++)
655 pp_string (pp
, args
[chunk
]);
657 /* Deallocate the chunk structure and everything after it (i.e. the
658 associated series of formatted strings). */
659 buffer
->cur_chunk_array
= chunk_array
->prev
;
660 obstack_free (&buffer
->chunk_obstack
, chunk_array
);
663 /* Helper subroutine of output_verbatim and verbatim. Do the appropriate
664 settings needed by BUFFER for a verbatim formatting. */
666 pp_format_verbatim (pretty_printer
*pp
, text_info
*text
)
668 /* Set verbatim mode. */
669 pp_wrapping_mode_t oldmode
= pp_set_verbatim_wrapping (pp
);
671 /* Do the actual formatting. */
672 pp_format (pp
, text
);
673 pp_output_formatted_text (pp
);
675 /* Restore previous settings. */
676 pp_wrapping_mode (pp
) = oldmode
;
679 /* Flush the content of BUFFER onto the attached stream. This
680 function does nothing unless pp->output_buffer->flush_p. */
682 pp_flush (pretty_printer
*pp
)
685 if (!pp
->buffer
->flush_p
)
687 pp_write_text_to_stream (pp
);
688 fflush (pp_buffer (pp
)->stream
);
691 /* Flush the content of BUFFER onto the attached stream independently
692 of the value of pp->output_buffer->flush_p. */
694 pp_really_flush (pretty_printer
*pp
)
697 pp_write_text_to_stream (pp
);
698 fflush (pp_buffer (pp
)->stream
);
701 /* Sets the number of maximum characters per line PRETTY-PRINTER can
702 output in line-wrapping mode. A LENGTH value 0 suppresses
705 pp_set_line_maximum_length (pretty_printer
*pp
, int length
)
707 pp_line_cutoff (pp
) = length
;
708 pp_set_real_maximum_length (pp
);
711 /* Clear PRETTY-PRINTER output area text info. */
713 pp_clear_output_area (pretty_printer
*pp
)
715 obstack_free (pp_buffer (pp
)->obstack
,
716 obstack_base (pp_buffer (pp
)->obstack
));
717 pp_buffer (pp
)->line_length
= 0;
720 /* Set PREFIX for PRETTY-PRINTER. */
722 pp_set_prefix (pretty_printer
*pp
, const char *prefix
)
725 pp_set_real_maximum_length (pp
);
726 pp
->emitted_prefix
= false;
727 pp_indentation (pp
) = 0;
730 /* Free PRETTY-PRINTER's prefix, a previously malloc()'d string. */
732 pp_destroy_prefix (pretty_printer
*pp
)
734 if (pp
->prefix
!= NULL
)
736 free (CONST_CAST (char *, pp
->prefix
));
741 /* Write out PRETTY-PRINTER's prefix. */
743 pp_emit_prefix (pretty_printer
*pp
)
745 if (pp
->prefix
!= NULL
)
747 switch (pp_prefixing_rule (pp
))
750 case DIAGNOSTICS_SHOW_PREFIX_NEVER
:
753 case DIAGNOSTICS_SHOW_PREFIX_ONCE
:
754 if (pp
->emitted_prefix
)
759 pp_indentation (pp
) += 3;
762 case DIAGNOSTICS_SHOW_PREFIX_EVERY_LINE
:
764 int prefix_length
= strlen (pp
->prefix
);
765 pp_append_r (pp
, pp
->prefix
, prefix_length
);
766 pp
->emitted_prefix
= true;
773 /* Construct a PRETTY-PRINTER with PREFIX and of MAXIMUM_LENGTH
774 characters per line. */
776 pretty_printer::pretty_printer (const char *p
, int l
)
777 : buffer (new (XCNEW (output_buffer
)) output_buffer ()),
786 translate_identifiers (true),
789 pp_line_cutoff (this) = l
;
790 /* By default, we emit prefixes once per message. */
791 pp_prefixing_rule (this) = DIAGNOSTICS_SHOW_PREFIX_ONCE
;
792 pp_set_prefix (this, p
);
795 pretty_printer::~pretty_printer ()
797 buffer
->~output_buffer ();
801 /* Append a string delimited by START and END to the output area of
802 PRETTY-PRINTER. No line wrapping is done. However, if beginning a
803 new line then emit PRETTY-PRINTER's prefix and skip any leading
804 whitespace if appropriate. The caller must ensure that it is
807 pp_append_text (pretty_printer
*pp
, const char *start
, const char *end
)
809 /* Emit prefix and skip whitespace if we're starting a new line. */
810 if (pp_buffer (pp
)->line_length
== 0)
813 if (pp_is_wrapping_line (pp
))
814 while (start
!= end
&& *start
== ' ')
817 pp_append_r (pp
, start
, end
- start
);
820 /* Finishes constructing a NULL-terminated character string representing
821 the PRETTY-PRINTED text. */
823 pp_formatted_text (pretty_printer
*pp
)
825 return output_buffer_formatted_text (pp_buffer (pp
));
828 /* Return a pointer to the last character emitted in PRETTY-PRINTER's
829 output area. A NULL pointer means no character available. */
831 pp_last_position_in_text (const pretty_printer
*pp
)
833 return output_buffer_last_position_in_text (pp_buffer (pp
));
836 /* Return the amount of characters PRETTY-PRINTER can accept to
837 make a full line. Meaningful only in line-wrapping mode. */
839 pp_remaining_character_count_for_line (pretty_printer
*pp
)
841 return pp
->maximum_length
- pp_buffer (pp
)->line_length
;
845 /* Format a message into BUFFER a la printf. */
847 pp_printf (pretty_printer
*pp
, const char *msg
, ...)
855 text
.format_spec
= msg
;
856 pp_format (pp
, &text
);
857 pp_output_formatted_text (pp
);
862 /* Output MESSAGE verbatim into BUFFER. */
864 pp_verbatim (pretty_printer
*pp
, const char *msg
, ...)
872 text
.format_spec
= msg
;
873 pp_format_verbatim (pp
, &text
);
879 /* Have PRETTY-PRINTER start a new line. */
881 pp_newline (pretty_printer
*pp
)
883 obstack_1grow (pp_buffer (pp
)->obstack
, '\n');
884 pp_needs_newline (pp
) = false;
885 pp_buffer (pp
)->line_length
= 0;
888 /* Have PRETTY-PRINTER add a CHARACTER. */
890 pp_character (pretty_printer
*pp
, int c
)
892 if (pp_is_wrapping_line (pp
)
893 && pp_remaining_character_count_for_line (pp
) <= 0)
899 obstack_1grow (pp_buffer (pp
)->obstack
, c
);
900 ++pp_buffer (pp
)->line_length
;
903 /* Append a STRING to the output area of PRETTY-PRINTER; the STRING may
904 be line-wrapped if in appropriate mode. */
906 pp_string (pretty_printer
*pp
, const char *str
)
908 pp_maybe_wrap_text (pp
, str
, str
+ (str
? strlen (str
) : 0));
911 /* Maybe print out a whitespace if needed. */
914 pp_maybe_space (pretty_printer
*pp
)
916 if (pp
->padding
!= pp_none
)
919 pp
->padding
= pp_none
;
923 // Add a newline to the pretty printer PP and flush formatted text.
926 pp_newline_and_flush (pretty_printer
*pp
)
930 pp_needs_newline (pp
) = false;
933 // Add a newline to the pretty printer PP, followed by indentation.
936 pp_newline_and_indent (pretty_printer
*pp
, int n
)
938 pp_indentation (pp
) += n
;
941 pp_needs_newline (pp
) = false;
944 // Add separator C, followed by a single whitespace.
947 pp_separate_with (pretty_printer
*pp
, char c
)
949 pp_character (pp
, c
);
954 /* The string starting at P has LEN (at least 1) bytes left; if they
955 start with a valid UTF-8 sequence, return the length of that
956 sequence and set *VALUE to the value of that sequence, and
957 otherwise return 0 and set *VALUE to (unsigned int) -1. */
960 decode_utf8_char (const unsigned char *p
, size_t len
, unsigned int *value
)
971 for (t
= *p
; t
& 0x80; t
<<= 1)
974 if (utf8_len
> len
|| utf8_len
< 2 || utf8_len
> 6)
976 *value
= (unsigned int) -1;
979 ch
= *p
& ((1 << (7 - utf8_len
)) - 1);
980 for (i
= 1; i
< utf8_len
; i
++)
982 unsigned int u
= p
[i
];
983 if ((u
& 0xC0) != 0x80)
985 *value
= (unsigned int) -1;
988 ch
= (ch
<< 6) | (u
& 0x3F);
990 if ( (ch
<= 0x7F && utf8_len
> 1)
991 || (ch
<= 0x7FF && utf8_len
> 2)
992 || (ch
<= 0xFFFF && utf8_len
> 3)
993 || (ch
<= 0x1FFFFF && utf8_len
> 4)
994 || (ch
<= 0x3FFFFFF && utf8_len
> 5)
995 || (ch
>= 0xD800 && ch
<= 0xDFFF))
997 *value
= (unsigned int) -1;
1010 /* Allocator for identifier_to_locale and corresponding function to
1013 void *(*identifier_to_locale_alloc
) (size_t) = xmalloc
;
1014 void (*identifier_to_locale_free
) (void *) = free
;
1016 /* Given IDENT, an identifier in the internal encoding, return a
1017 version of IDENT suitable for diagnostics in the locale character
1018 set: either IDENT itself, or a string, allocated using
1019 identifier_to_locale_alloc, converted to the locale character set
1020 and using escape sequences if not representable in the locale
1021 character set or containing control characters or invalid byte
1022 sequences. Existing backslashes in IDENT are not doubled, so the
1023 result may not uniquely specify the contents of an arbitrary byte
1024 sequence identifier. */
1027 identifier_to_locale (const char *ident
)
1029 const unsigned char *uid
= (const unsigned char *) ident
;
1030 size_t idlen
= strlen (ident
);
1031 bool valid_printable_utf8
= true;
1032 bool all_ascii
= true;
1035 for (i
= 0; i
< idlen
;)
1038 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
1039 if (utf8_len
== 0 || c
<= 0x1F || (c
>= 0x7F && c
<= 0x9F))
1041 valid_printable_utf8
= false;
1049 /* If IDENT contains invalid UTF-8 sequences (which may occur with
1050 attributes putting arbitrary byte sequences in identifiers), or
1051 control characters, we use octal escape sequences for all bytes
1052 outside printable ASCII. */
1053 if (!valid_printable_utf8
)
1055 char *ret
= (char *) identifier_to_locale_alloc (4 * idlen
+ 1);
1057 for (i
= 0; i
< idlen
; i
++)
1059 if (uid
[i
] > 0x1F && uid
[i
] < 0x7F)
1063 sprintf (p
, "\\%03o", uid
[i
]);
1071 /* Otherwise, if it is valid printable ASCII, or printable UTF-8
1072 with the locale character set being UTF-8, IDENT is used. */
1073 if (all_ascii
|| locale_utf8
)
1076 /* Otherwise IDENT is converted to the locale character set if
1078 #if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV
1079 if (locale_encoding
!= NULL
)
1081 iconv_t cd
= iconv_open (locale_encoding
, "UTF-8");
1082 bool conversion_ok
= true;
1084 if (cd
!= (iconv_t
) -1)
1086 size_t ret_alloc
= 4 * idlen
+ 1;
1089 /* Repeat the whole conversion process as needed with
1090 larger buffers so non-reversible transformations can
1091 always be detected. */
1092 ICONV_CONST
char *inbuf
= CONST_CAST (char *, ident
);
1094 size_t inbytesleft
= idlen
;
1095 size_t outbytesleft
= ret_alloc
- 1;
1098 ret
= (char *) identifier_to_locale_alloc (ret_alloc
);
1101 if (iconv (cd
, 0, 0, 0, 0) == (size_t) -1)
1103 conversion_ok
= false;
1107 iconv_ret
= iconv (cd
, &inbuf
, &inbytesleft
,
1108 &outbuf
, &outbytesleft
);
1109 if (iconv_ret
== (size_t) -1 || inbytesleft
!= 0)
1114 identifier_to_locale_free (ret
);
1120 conversion_ok
= false;
1124 else if (iconv_ret
!= 0)
1126 conversion_ok
= false;
1129 /* Return to initial shift state. */
1130 if (iconv (cd
, 0, 0, &outbuf
, &outbytesleft
) == (size_t) -1)
1135 identifier_to_locale_free (ret
);
1141 conversion_ok
= false;
1155 /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */
1157 char *ret
= (char *) identifier_to_locale_alloc (10 * idlen
+ 1);
1159 for (i
= 0; i
< idlen
;)
1162 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
1167 sprintf (p
, "\\U%08x", c
);