1 /* Various declarations for language-independent pretty-print subroutines.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Gabriel Dos Reis <gdr@integrable-solutions.net>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "pretty-print.h"
26 #include "diagnostic-color.h"
28 #include <new> // For placement-new.
34 /* Overwrite the given location/range within this text_info's rich_location.
35 For use e.g. when implementing "+" in client format decoders. */
38 text_info::set_location (unsigned int idx
, location_t loc
, bool show_caret_p
)
40 gcc_checking_assert (m_richloc
);
41 m_richloc
->set_range (line_table
, idx
, loc
, show_caret_p
);
45 text_info::get_location (unsigned int index_of_location
) const
47 gcc_checking_assert (m_richloc
);
49 if (index_of_location
== 0)
50 return m_richloc
->get_loc ();
52 return UNKNOWN_LOCATION
;
55 // Default construct an output buffer.
57 output_buffer::output_buffer ()
58 : formatted_obstack (),
60 obstack (&formatted_obstack
),
67 obstack_init (&formatted_obstack
);
68 obstack_init (&chunk_obstack
);
71 // Release resources owned by an output buffer at the end of lifetime.
73 output_buffer::~output_buffer ()
75 obstack_free (&chunk_obstack
, NULL
);
76 obstack_free (&formatted_obstack
, NULL
);
80 /* Format an integer given by va_arg (ARG, type-specifier T) where
81 type-specifier is a precision modifier as indicated by PREC. F is
82 a string used to construct the appropriate format-specifier. */
83 #define pp_integer_with_precision(PP, ARG, PREC, T, F) \
88 pp_scalar (PP, "%" F, va_arg (ARG, T)); \
92 pp_scalar (PP, "%l" F, va_arg (ARG, long T)); \
96 pp_scalar (PP, "%" HOST_LONG_LONG_FORMAT F, va_arg (ARG, long long T)); \
105 /* Subroutine of pp_set_maximum_length. Set up PRETTY-PRINTER's
106 internal maximum characters per line. */
108 pp_set_real_maximum_length (pretty_printer
*pp
)
110 /* If we're told not to wrap lines then do the obvious thing. In case
111 we'll emit prefix only once per message, it is appropriate
112 not to increase unnecessarily the line-length cut-off. */
113 if (!pp_is_wrapping_line (pp
)
114 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_ONCE
115 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_NEVER
)
116 pp
->maximum_length
= pp_line_cutoff (pp
);
119 int prefix_length
= pp
->prefix
? strlen (pp
->prefix
) : 0;
120 /* If the prefix is ridiculously too long, output at least
122 if (pp_line_cutoff (pp
) - prefix_length
< 32)
123 pp
->maximum_length
= pp_line_cutoff (pp
) + 32;
125 pp
->maximum_length
= pp_line_cutoff (pp
);
129 /* Clear PRETTY-PRINTER's output state. */
131 pp_clear_state (pretty_printer
*pp
)
133 pp
->emitted_prefix
= false;
134 pp_indentation (pp
) = 0;
137 /* Flush the formatted text of PRETTY-PRINTER onto the attached stream. */
139 pp_write_text_to_stream (pretty_printer
*pp
)
141 const char *text
= pp_formatted_text (pp
);
142 fputs (text
, pp_buffer (pp
)->stream
);
143 pp_clear_output_area (pp
);
146 /* As pp_write_text_to_stream, but for GraphViz label output.
148 Flush the formatted text of pretty-printer PP onto the attached stream.
149 Replace characters in PPF that have special meaning in a GraphViz .dot
152 This routine is not very fast, but it doesn't have to be as this is only
153 be used by routines dumping intermediate representations in graph form. */
156 pp_write_text_as_dot_label_to_stream (pretty_printer
*pp
, bool for_record
)
158 const char *text
= pp_formatted_text (pp
);
159 const char *p
= text
;
160 FILE *fp
= pp_buffer (pp
)->stream
;
166 /* Print newlines as a left-aligned newline. */
168 fputs ("\\l\\\n", fp
);
171 /* A pipe is only special for record-shape nodes. */
178 /* The following characters always have to be escaped
179 for use in labels. */
195 pp_clear_output_area (pp
);
198 /* Wrap a text delimited by START and END into PRETTY-PRINTER. */
200 pp_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
202 bool wrapping_line
= pp_is_wrapping_line (pp
);
206 /* Dump anything bordered by whitespaces. */
208 const char *p
= start
;
209 while (p
!= end
&& !ISBLANK (*p
) && *p
!= '\n')
212 && p
- start
>= pp_remaining_character_count_for_line (pp
))
214 pp_append_text (pp
, start
, p
);
218 if (start
!= end
&& ISBLANK (*start
))
223 if (start
!= end
&& *start
== '\n')
231 /* Same as pp_wrap_text but wrap text only when in line-wrapping mode. */
233 pp_maybe_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
235 if (pp_is_wrapping_line (pp
))
236 pp_wrap_text (pp
, start
, end
);
238 pp_append_text (pp
, start
, end
);
241 /* Append to the output area of PRETTY-PRINTER a string specified by its
242 STARTing character and LENGTH. */
244 pp_append_r (pretty_printer
*pp
, const char *start
, int length
)
246 output_buffer_append_r (pp_buffer (pp
), start
, length
);
249 /* Insert enough spaces into the output area of PRETTY-PRINTER to bring
250 the column position to the current indentation level, assuming that a
251 newline has just been written to the buffer. */
253 pp_indent (pretty_printer
*pp
)
255 int n
= pp_indentation (pp
);
258 for (i
= 0; i
< n
; ++i
)
262 /* The following format specifiers are recognized as being client independent:
263 %d, %i: (signed) integer in base ten.
264 %u: unsigned integer in base ten.
265 %o: unsigned integer in base eight.
266 %x: unsigned integer in base sixteen.
267 %ld, %li, %lo, %lu, %lx: long versions of the above.
268 %lld, %lli, %llo, %llu, %llx: long long versions.
269 %wd, %wi, %wo, %wu, %wx: HOST_WIDE_INT versions.
273 %r: if pp_show_color(pp), switch to color identified by const char *.
274 %R: if pp_show_color(pp), reset color.
275 %m: strerror(text->err_no) - does not consume a value from args_ptr.
279 %': apostrophe (should only be used in untranslated messages;
280 translations should use appropriate punctuation directly).
281 %.*s: a substring the length of which is specified by an argument
283 %Ns: likewise, but length specified as constant in the format string.
284 Flag 'q': quote formatted text (must come immediately after '%').
286 Arguments can be used sequentially, or through %N$ resp. *N$
287 notation Nth argument after the format string. If %N$ / *N$
288 notation is used, it must be used for all arguments, except %m, %%,
289 %<, %> and %', which may not have a number, as they do not consume
290 an argument. When %M$.*N$s is used, M must be N + 1. (This may
291 also be written %M$.*s, provided N is not otherwise used.) The
292 format string must have conversion specifiers with argument numbers
293 1 up to highest argument; each argument may only be used once.
294 A format string can have at most 30 arguments. */
296 /* Formatting phases 1 and 2: render TEXT->format_spec plus
297 TEXT->args_ptr into a series of chunks in pp_buffer (PP)->args[].
298 Phase 3 is in pp_format_text. */
301 pp_format (pretty_printer
*pp
, text_info
*text
)
303 output_buffer
*buffer
= pp_buffer (pp
);
306 struct chunk_info
*new_chunk_array
;
308 unsigned int curarg
= 0, chunk
= 0, argno
;
309 pp_wrapping_mode_t old_wrapping_mode
;
310 bool any_unnumbered
= false, any_numbered
= false;
311 const char **formatters
[PP_NL_ARGMAX
];
313 /* Allocate a new chunk structure. */
314 new_chunk_array
= XOBNEW (&buffer
->chunk_obstack
, struct chunk_info
);
315 new_chunk_array
->prev
= buffer
->cur_chunk_array
;
316 buffer
->cur_chunk_array
= new_chunk_array
;
317 args
= new_chunk_array
->args
;
319 /* Formatting phase 1: split up TEXT->format_spec into chunks in
320 pp_buffer (PP)->args[]. Even-numbered chunks are to be output
321 verbatim, odd-numbered chunks are format specifiers.
322 %m, %%, %<, %>, and %' are replaced with the appropriate text at
325 memset (formatters
, 0, sizeof formatters
);
327 for (p
= text
->format_spec
; *p
; )
329 while (*p
!= '\0' && *p
!= '%')
331 obstack_1grow (&buffer
->chunk_obstack
, *p
);
344 obstack_1grow (&buffer
->chunk_obstack
, '%');
350 obstack_grow (&buffer
->chunk_obstack
,
351 open_quote
, strlen (open_quote
));
353 = colorize_start (pp_show_color (pp
), "quote");
354 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
361 const char *colorstr
= colorize_stop (pp_show_color (pp
));
362 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
366 obstack_grow (&buffer
->chunk_obstack
,
367 close_quote
, strlen (close_quote
));
373 const char *colorstr
= colorize_stop (pp_show_color (pp
));
374 obstack_grow (&buffer
->chunk_obstack
, colorstr
,
382 const char *errstr
= xstrerror (text
->err_no
);
383 obstack_grow (&buffer
->chunk_obstack
, errstr
, strlen (errstr
));
389 /* Handled in phase 2. Terminate the plain chunk here. */
390 obstack_1grow (&buffer
->chunk_obstack
, '\0');
391 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
392 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
399 argno
= strtoul (p
, &end
, 10) - 1;
401 gcc_assert (*p
== '$');
405 gcc_assert (!any_unnumbered
);
410 any_unnumbered
= true;
411 gcc_assert (!any_numbered
);
413 gcc_assert (argno
< PP_NL_ARGMAX
);
414 gcc_assert (!formatters
[argno
]);
415 formatters
[argno
] = &args
[chunk
];
418 obstack_1grow (&buffer
->chunk_obstack
, *p
);
421 while (strchr ("qwl+#", p
[-1]));
425 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
426 (where M == N + 1). */
431 obstack_1grow (&buffer
->chunk_obstack
, *p
);
434 while (ISDIGIT (p
[-1]));
435 gcc_assert (p
[-1] == 's');
439 gcc_assert (*p
== '*');
440 obstack_1grow (&buffer
->chunk_obstack
, '*');
446 unsigned int argno2
= strtoul (p
, &end
, 10) - 1;
448 gcc_assert (argno2
== argno
- 1);
449 gcc_assert (!any_unnumbered
);
450 gcc_assert (*p
== '$');
453 formatters
[argno2
] = formatters
[argno
];
457 gcc_assert (!any_numbered
);
458 formatters
[argno
+1] = formatters
[argno
];
461 gcc_assert (*p
== 's');
462 obstack_1grow (&buffer
->chunk_obstack
, 's');
469 obstack_1grow (&buffer
->chunk_obstack
, '\0');
470 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
471 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
474 obstack_1grow (&buffer
->chunk_obstack
, '\0');
475 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
476 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
479 /* Set output to the argument obstack, and switch line-wrapping and
481 buffer
->obstack
= &buffer
->chunk_obstack
;
482 old_wrapping_mode
= pp_set_verbatim_wrapping (pp
);
484 /* Second phase. Replace each formatter with the formatted text it
487 for (argno
= 0; formatters
[argno
]; argno
++)
495 /* We do not attempt to enforce any ordering on the modifier
498 for (p
= *formatters
[argno
];; p
++)
523 /* We don't support precision beyond that of "long long". */
524 gcc_assert (precision
< 2);
531 gcc_assert (!wide
|| precision
== 0);
535 pp_string (pp
, open_quote
);
536 pp_string (pp
, colorize_start (pp_show_color (pp
), "quote"));
542 pp_string (pp
, colorize_start (pp_show_color (pp
),
543 va_arg (*text
->args_ptr
,
548 pp_character (pp
, va_arg (*text
->args_ptr
, int));
554 pp_wide_integer (pp
, va_arg (*text
->args_ptr
, HOST_WIDE_INT
));
556 pp_integer_with_precision
557 (pp
, *text
->args_ptr
, precision
, int, "d");
562 pp_scalar (pp
, "%" HOST_WIDE_INT_PRINT
"o",
563 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
565 pp_integer_with_precision
566 (pp
, *text
->args_ptr
, precision
, unsigned, "o");
570 pp_string (pp
, va_arg (*text
->args_ptr
, const char *));
574 pp_pointer (pp
, va_arg (*text
->args_ptr
, void *));
579 pp_scalar (pp
, HOST_WIDE_INT_PRINT_UNSIGNED
,
580 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
582 pp_integer_with_precision
583 (pp
, *text
->args_ptr
, precision
, unsigned, "u");
588 pp_scalar (pp
, HOST_WIDE_INT_PRINT_HEX
,
589 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
591 pp_integer_with_precision
592 (pp
, *text
->args_ptr
, precision
, unsigned, "x");
600 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
601 (where M == N + 1). The format string should be verified
602 already from the first phase. */
607 n
= strtoul (p
, &end
, 10);
609 gcc_assert (*p
== 's');
613 gcc_assert (*p
== '*');
615 gcc_assert (*p
== 's');
616 n
= va_arg (*text
->args_ptr
, int);
618 /* This consumes a second entry in the formatters array. */
619 gcc_assert (formatters
[argno
] == formatters
[argno
+1]);
623 s
= va_arg (*text
->args_ptr
, const char *);
624 pp_append_text (pp
, s
, s
+ n
);
632 gcc_assert (pp_format_decoder (pp
));
633 ok
= pp_format_decoder (pp
) (pp
, text
, p
,
634 precision
, wide
, plus
, hash
);
641 pp_string (pp
, colorize_stop (pp_show_color (pp
)));
642 pp_string (pp
, close_quote
);
645 obstack_1grow (&buffer
->chunk_obstack
, '\0');
646 *formatters
[argno
] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
650 for (; argno
< PP_NL_ARGMAX
; argno
++)
651 gcc_assert (!formatters
[argno
]);
653 /* Revert to normal obstack and wrapping mode. */
654 buffer
->obstack
= &buffer
->formatted_obstack
;
655 buffer
->line_length
= 0;
656 pp_wrapping_mode (pp
) = old_wrapping_mode
;
660 /* Format of a message pointed to by TEXT. */
662 pp_output_formatted_text (pretty_printer
*pp
)
665 output_buffer
*buffer
= pp_buffer (pp
);
666 struct chunk_info
*chunk_array
= buffer
->cur_chunk_array
;
667 const char **args
= chunk_array
->args
;
669 gcc_assert (buffer
->obstack
== &buffer
->formatted_obstack
);
670 gcc_assert (buffer
->line_length
== 0);
672 /* This is a third phase, first 2 phases done in pp_format_args.
673 Now we actually print it. */
674 for (chunk
= 0; args
[chunk
]; chunk
++)
675 pp_string (pp
, args
[chunk
]);
677 /* Deallocate the chunk structure and everything after it (i.e. the
678 associated series of formatted strings). */
679 buffer
->cur_chunk_array
= chunk_array
->prev
;
680 obstack_free (&buffer
->chunk_obstack
, chunk_array
);
683 /* Helper subroutine of output_verbatim and verbatim. Do the appropriate
684 settings needed by BUFFER for a verbatim formatting. */
686 pp_format_verbatim (pretty_printer
*pp
, text_info
*text
)
688 /* Set verbatim mode. */
689 pp_wrapping_mode_t oldmode
= pp_set_verbatim_wrapping (pp
);
691 /* Do the actual formatting. */
692 pp_format (pp
, text
);
693 pp_output_formatted_text (pp
);
695 /* Restore previous settings. */
696 pp_wrapping_mode (pp
) = oldmode
;
699 /* Flush the content of BUFFER onto the attached stream. This
700 function does nothing unless pp->output_buffer->flush_p. */
702 pp_flush (pretty_printer
*pp
)
705 if (!pp
->buffer
->flush_p
)
707 pp_write_text_to_stream (pp
);
708 fflush (pp_buffer (pp
)->stream
);
711 /* Flush the content of BUFFER onto the attached stream independently
712 of the value of pp->output_buffer->flush_p. */
714 pp_really_flush (pretty_printer
*pp
)
717 pp_write_text_to_stream (pp
);
718 fflush (pp_buffer (pp
)->stream
);
721 /* Sets the number of maximum characters per line PRETTY-PRINTER can
722 output in line-wrapping mode. A LENGTH value 0 suppresses
725 pp_set_line_maximum_length (pretty_printer
*pp
, int length
)
727 pp_line_cutoff (pp
) = length
;
728 pp_set_real_maximum_length (pp
);
731 /* Clear PRETTY-PRINTER output area text info. */
733 pp_clear_output_area (pretty_printer
*pp
)
735 obstack_free (pp_buffer (pp
)->obstack
,
736 obstack_base (pp_buffer (pp
)->obstack
));
737 pp_buffer (pp
)->line_length
= 0;
740 /* Set PREFIX for PRETTY-PRINTER. */
742 pp_set_prefix (pretty_printer
*pp
, const char *prefix
)
745 pp_set_real_maximum_length (pp
);
746 pp
->emitted_prefix
= false;
747 pp_indentation (pp
) = 0;
750 /* Free PRETTY-PRINTER's prefix, a previously malloc()'d string. */
752 pp_destroy_prefix (pretty_printer
*pp
)
754 if (pp
->prefix
!= NULL
)
756 free (CONST_CAST (char *, pp
->prefix
));
761 /* Write out PRETTY-PRINTER's prefix. */
763 pp_emit_prefix (pretty_printer
*pp
)
765 if (pp
->prefix
!= NULL
)
767 switch (pp_prefixing_rule (pp
))
770 case DIAGNOSTICS_SHOW_PREFIX_NEVER
:
773 case DIAGNOSTICS_SHOW_PREFIX_ONCE
:
774 if (pp
->emitted_prefix
)
779 pp_indentation (pp
) += 3;
782 case DIAGNOSTICS_SHOW_PREFIX_EVERY_LINE
:
784 int prefix_length
= strlen (pp
->prefix
);
785 pp_append_r (pp
, pp
->prefix
, prefix_length
);
786 pp
->emitted_prefix
= true;
793 /* Construct a PRETTY-PRINTER with PREFIX and of MAXIMUM_LENGTH
794 characters per line. */
796 pretty_printer::pretty_printer (const char *p
, int l
)
797 : buffer (new (XCNEW (output_buffer
)) output_buffer ()),
806 translate_identifiers (true),
809 pp_line_cutoff (this) = l
;
810 /* By default, we emit prefixes once per message. */
811 pp_prefixing_rule (this) = DIAGNOSTICS_SHOW_PREFIX_ONCE
;
812 pp_set_prefix (this, p
);
815 pretty_printer::~pretty_printer ()
817 buffer
->~output_buffer ();
821 /* Append a string delimited by START and END to the output area of
822 PRETTY-PRINTER. No line wrapping is done. However, if beginning a
823 new line then emit PRETTY-PRINTER's prefix and skip any leading
824 whitespace if appropriate. The caller must ensure that it is
827 pp_append_text (pretty_printer
*pp
, const char *start
, const char *end
)
829 /* Emit prefix and skip whitespace if we're starting a new line. */
830 if (pp_buffer (pp
)->line_length
== 0)
833 if (pp_is_wrapping_line (pp
))
834 while (start
!= end
&& *start
== ' ')
837 pp_append_r (pp
, start
, end
- start
);
840 /* Finishes constructing a NULL-terminated character string representing
841 the PRETTY-PRINTED text. */
843 pp_formatted_text (pretty_printer
*pp
)
845 return output_buffer_formatted_text (pp_buffer (pp
));
848 /* Return a pointer to the last character emitted in PRETTY-PRINTER's
849 output area. A NULL pointer means no character available. */
851 pp_last_position_in_text (const pretty_printer
*pp
)
853 return output_buffer_last_position_in_text (pp_buffer (pp
));
856 /* Return the amount of characters PRETTY-PRINTER can accept to
857 make a full line. Meaningful only in line-wrapping mode. */
859 pp_remaining_character_count_for_line (pretty_printer
*pp
)
861 return pp
->maximum_length
- pp_buffer (pp
)->line_length
;
865 /* Format a message into BUFFER a la printf. */
867 pp_printf (pretty_printer
*pp
, const char *msg
, ...)
875 text
.format_spec
= msg
;
876 pp_format (pp
, &text
);
877 pp_output_formatted_text (pp
);
882 /* Output MESSAGE verbatim into BUFFER. */
884 pp_verbatim (pretty_printer
*pp
, const char *msg
, ...)
892 text
.format_spec
= msg
;
893 pp_format_verbatim (pp
, &text
);
899 /* Have PRETTY-PRINTER start a new line. */
901 pp_newline (pretty_printer
*pp
)
903 obstack_1grow (pp_buffer (pp
)->obstack
, '\n');
904 pp_needs_newline (pp
) = false;
905 pp_buffer (pp
)->line_length
= 0;
908 /* Have PRETTY-PRINTER add a CHARACTER. */
910 pp_character (pretty_printer
*pp
, int c
)
912 if (pp_is_wrapping_line (pp
)
913 && pp_remaining_character_count_for_line (pp
) <= 0)
919 obstack_1grow (pp_buffer (pp
)->obstack
, c
);
920 ++pp_buffer (pp
)->line_length
;
923 /* Append a STRING to the output area of PRETTY-PRINTER; the STRING may
924 be line-wrapped if in appropriate mode. */
926 pp_string (pretty_printer
*pp
, const char *str
)
928 gcc_checking_assert (str
);
929 pp_maybe_wrap_text (pp
, str
, str
+ strlen (str
));
932 /* Maybe print out a whitespace if needed. */
935 pp_maybe_space (pretty_printer
*pp
)
937 if (pp
->padding
!= pp_none
)
940 pp
->padding
= pp_none
;
944 // Add a newline to the pretty printer PP and flush formatted text.
947 pp_newline_and_flush (pretty_printer
*pp
)
951 pp_needs_newline (pp
) = false;
954 // Add a newline to the pretty printer PP, followed by indentation.
957 pp_newline_and_indent (pretty_printer
*pp
, int n
)
959 pp_indentation (pp
) += n
;
962 pp_needs_newline (pp
) = false;
965 // Add separator C, followed by a single whitespace.
968 pp_separate_with (pretty_printer
*pp
, char c
)
970 pp_character (pp
, c
);
975 /* The string starting at P has LEN (at least 1) bytes left; if they
976 start with a valid UTF-8 sequence, return the length of that
977 sequence and set *VALUE to the value of that sequence, and
978 otherwise return 0 and set *VALUE to (unsigned int) -1. */
981 decode_utf8_char (const unsigned char *p
, size_t len
, unsigned int *value
)
992 for (t
= *p
; t
& 0x80; t
<<= 1)
995 if (utf8_len
> len
|| utf8_len
< 2 || utf8_len
> 6)
997 *value
= (unsigned int) -1;
1000 ch
= *p
& ((1 << (7 - utf8_len
)) - 1);
1001 for (i
= 1; i
< utf8_len
; i
++)
1003 unsigned int u
= p
[i
];
1004 if ((u
& 0xC0) != 0x80)
1006 *value
= (unsigned int) -1;
1009 ch
= (ch
<< 6) | (u
& 0x3F);
1011 if ( (ch
<= 0x7F && utf8_len
> 1)
1012 || (ch
<= 0x7FF && utf8_len
> 2)
1013 || (ch
<= 0xFFFF && utf8_len
> 3)
1014 || (ch
<= 0x1FFFFF && utf8_len
> 4)
1015 || (ch
<= 0x3FFFFFF && utf8_len
> 5)
1016 || (ch
>= 0xD800 && ch
<= 0xDFFF))
1018 *value
= (unsigned int) -1;
1031 /* Allocator for identifier_to_locale and corresponding function to
1034 void *(*identifier_to_locale_alloc
) (size_t) = xmalloc
;
1035 void (*identifier_to_locale_free
) (void *) = free
;
1037 /* Given IDENT, an identifier in the internal encoding, return a
1038 version of IDENT suitable for diagnostics in the locale character
1039 set: either IDENT itself, or a string, allocated using
1040 identifier_to_locale_alloc, converted to the locale character set
1041 and using escape sequences if not representable in the locale
1042 character set or containing control characters or invalid byte
1043 sequences. Existing backslashes in IDENT are not doubled, so the
1044 result may not uniquely specify the contents of an arbitrary byte
1045 sequence identifier. */
1048 identifier_to_locale (const char *ident
)
1050 const unsigned char *uid
= (const unsigned char *) ident
;
1051 size_t idlen
= strlen (ident
);
1052 bool valid_printable_utf8
= true;
1053 bool all_ascii
= true;
1056 for (i
= 0; i
< idlen
;)
1059 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
1060 if (utf8_len
== 0 || c
<= 0x1F || (c
>= 0x7F && c
<= 0x9F))
1062 valid_printable_utf8
= false;
1070 /* If IDENT contains invalid UTF-8 sequences (which may occur with
1071 attributes putting arbitrary byte sequences in identifiers), or
1072 control characters, we use octal escape sequences for all bytes
1073 outside printable ASCII. */
1074 if (!valid_printable_utf8
)
1076 char *ret
= (char *) identifier_to_locale_alloc (4 * idlen
+ 1);
1078 for (i
= 0; i
< idlen
; i
++)
1080 if (uid
[i
] > 0x1F && uid
[i
] < 0x7F)
1084 sprintf (p
, "\\%03o", uid
[i
]);
1092 /* Otherwise, if it is valid printable ASCII, or printable UTF-8
1093 with the locale character set being UTF-8, IDENT is used. */
1094 if (all_ascii
|| locale_utf8
)
1097 /* Otherwise IDENT is converted to the locale character set if
1099 #if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV
1100 if (locale_encoding
!= NULL
)
1102 iconv_t cd
= iconv_open (locale_encoding
, "UTF-8");
1103 bool conversion_ok
= true;
1105 if (cd
!= (iconv_t
) -1)
1107 size_t ret_alloc
= 4 * idlen
+ 1;
1110 /* Repeat the whole conversion process as needed with
1111 larger buffers so non-reversible transformations can
1112 always be detected. */
1113 ICONV_CONST
char *inbuf
= CONST_CAST (char *, ident
);
1115 size_t inbytesleft
= idlen
;
1116 size_t outbytesleft
= ret_alloc
- 1;
1119 ret
= (char *) identifier_to_locale_alloc (ret_alloc
);
1122 if (iconv (cd
, 0, 0, 0, 0) == (size_t) -1)
1124 conversion_ok
= false;
1128 iconv_ret
= iconv (cd
, &inbuf
, &inbytesleft
,
1129 &outbuf
, &outbytesleft
);
1130 if (iconv_ret
== (size_t) -1 || inbytesleft
!= 0)
1135 identifier_to_locale_free (ret
);
1141 conversion_ok
= false;
1145 else if (iconv_ret
!= 0)
1147 conversion_ok
= false;
1150 /* Return to initial shift state. */
1151 if (iconv (cd
, 0, 0, &outbuf
, &outbytesleft
) == (size_t) -1)
1156 identifier_to_locale_free (ret
);
1162 conversion_ok
= false;
1176 /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */
1178 char *ret
= (char *) identifier_to_locale_alloc (10 * idlen
+ 1);
1180 for (i
= 0; i
< idlen
;)
1183 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
1188 sprintf (p
, "\\U%08x", c
);