1 /* Various declarations for language-independent pretty-print subroutines.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Gabriel Dos Reis <gdr@integrable-solutions.net>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "pretty-print.h"
26 #include "diagnostic-color.h"
32 /* A pointer to the formatted diagnostic message. */
33 #define pp_formatted_text_data(PP) \
34 ((const char *) obstack_base (pp_base (PP)->buffer->obstack))
36 /* Format an integer given by va_arg (ARG, type-specifier T) where
37 type-specifier is a precision modifier as indicated by PREC. F is
38 a string used to construct the appropriate format-specifier. */
39 #define pp_integer_with_precision(PP, ARG, PREC, T, F) \
44 pp_scalar (PP, "%" F, va_arg (ARG, T)); \
48 pp_scalar (PP, "%l" F, va_arg (ARG, long T)); \
52 pp_scalar (PP, "%" HOST_LONG_LONG_FORMAT F, va_arg (ARG, long long T)); \
61 /* Subroutine of pp_set_maximum_length. Set up PRETTY-PRINTER's
62 internal maximum characters per line. */
64 pp_set_real_maximum_length (pretty_printer
*pp
)
66 /* If we're told not to wrap lines then do the obvious thing. In case
67 we'll emit prefix only once per message, it is appropriate
68 not to increase unnecessarily the line-length cut-off. */
69 if (!pp_is_wrapping_line (pp
)
70 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_ONCE
71 || pp_prefixing_rule (pp
) == DIAGNOSTICS_SHOW_PREFIX_NEVER
)
72 pp
->maximum_length
= pp_line_cutoff (pp
);
75 int prefix_length
= pp
->prefix
? strlen (pp
->prefix
) : 0;
76 /* If the prefix is ridiculously too long, output at least
78 if (pp_line_cutoff (pp
) - prefix_length
< 32)
79 pp
->maximum_length
= pp_line_cutoff (pp
) + 32;
81 pp
->maximum_length
= pp_line_cutoff (pp
);
85 /* Clear PRETTY-PRINTER's output state. */
87 pp_clear_state (pretty_printer
*pp
)
89 pp
->emitted_prefix
= false;
90 pp_indentation (pp
) = 0;
93 /* Flush the formatted text of PRETTY-PRINTER onto the attached stream. */
95 pp_write_text_to_stream (pretty_printer
*pp
)
97 const char *text
= pp_formatted_text (pp
);
98 fputs (text
, pp
->buffer
->stream
);
99 pp_clear_output_area (pp
);
102 /* As pp_write_text_to_stream, but for GraphViz label output.
104 Flush the formatted text of pretty-printer PP onto the attached stream.
105 Replace characters in PPF that have special meaning in a GraphViz .dot
108 This routine is not very fast, but it doesn't have to be as this is only
109 be used by routines dumping intermediate representations in graph form. */
112 pp_write_text_as_dot_label_to_stream (pretty_printer
*pp
, bool for_record
)
114 const char *text
= pp_formatted_text (pp
);
115 const char *p
= text
;
116 FILE *fp
= pp
->buffer
->stream
;
122 /* Print newlines as a left-aligned newline. */
124 fputs ("\\l\\\n", fp
);
127 /* A pipe is only special for record-shape nodes. */
134 /* The following characters always have to be escaped
135 for use in labels. */
151 pp_clear_output_area (pp
);
154 /* Wrap a text delimited by START and END into PRETTY-PRINTER. */
156 pp_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
158 bool wrapping_line
= pp_is_wrapping_line (pp
);
162 /* Dump anything bordered by whitespaces. */
164 const char *p
= start
;
165 while (p
!= end
&& !ISBLANK (*p
) && *p
!= '\n')
168 && p
- start
>= pp_remaining_character_count_for_line (pp
))
170 pp_append_text (pp
, start
, p
);
174 if (start
!= end
&& ISBLANK (*start
))
179 if (start
!= end
&& *start
== '\n')
187 /* Same as pp_wrap_text but wrap text only when in line-wrapping mode. */
189 pp_maybe_wrap_text (pretty_printer
*pp
, const char *start
, const char *end
)
191 if (pp_is_wrapping_line (pp
))
192 pp_wrap_text (pp
, start
, end
);
194 pp_append_text (pp
, start
, end
);
197 /* Append to the output area of PRETTY-PRINTER a string specified by its
198 STARTing character and LENGTH. */
200 pp_append_r (pretty_printer
*pp
, const char *start
, int length
)
202 obstack_grow (pp
->buffer
->obstack
, start
, length
);
203 pp
->buffer
->line_length
+= length
;
206 /* Insert enough spaces into the output area of PRETTY-PRINTER to bring
207 the column position to the current indentation level, assuming that a
208 newline has just been written to the buffer. */
210 pp_base_indent (pretty_printer
*pp
)
212 int n
= pp_indentation (pp
);
215 for (i
= 0; i
< n
; ++i
)
219 /* The following format specifiers are recognized as being client independent:
220 %d, %i: (signed) integer in base ten.
221 %u: unsigned integer in base ten.
222 %o: unsigned integer in base eight.
223 %x: unsigned integer in base sixteen.
224 %ld, %li, %lo, %lu, %lx: long versions of the above.
225 %lld, %lli, %llo, %llu, %llx: long long versions.
226 %wd, %wi, %wo, %wu, %wx: HOST_WIDE_INT versions.
230 %r: if pp_show_color(pp), switch to color identified by const char *.
231 %R: if pp_show_color(pp), reset color.
232 %m: strerror(text->err_no) - does not consume a value from args_ptr.
236 %': apostrophe (should only be used in untranslated messages;
237 translations should use appropriate punctuation directly).
238 %.*s: a substring the length of which is specified by an argument
240 %Ns: likewise, but length specified as constant in the format string.
241 Flag 'q': quote formatted text (must come immediately after '%').
243 Arguments can be used sequentially, or through %N$ resp. *N$
244 notation Nth argument after the format string. If %N$ / *N$
245 notation is used, it must be used for all arguments, except %m, %%,
246 %<, %> and %', which may not have a number, as they do not consume
247 an argument. When %M$.*N$s is used, M must be N + 1. (This may
248 also be written %M$.*s, provided N is not otherwise used.) The
249 format string must have conversion specifiers with argument numbers
250 1 up to highest argument; each argument may only be used once.
251 A format string can have at most 30 arguments. */
253 /* Formatting phases 1 and 2: render TEXT->format_spec plus
254 TEXT->args_ptr into a series of chunks in PP->buffer->args[].
255 Phase 3 is in pp_base_format_text. */
258 pp_base_format (pretty_printer
*pp
, text_info
*text
)
260 output_buffer
*buffer
= pp
->buffer
;
263 struct chunk_info
*new_chunk_array
;
265 unsigned int curarg
= 0, chunk
= 0, argno
;
266 pp_wrapping_mode_t old_wrapping_mode
;
267 bool any_unnumbered
= false, any_numbered
= false;
268 const char **formatters
[PP_NL_ARGMAX
];
270 /* Allocate a new chunk structure. */
271 new_chunk_array
= XOBNEW (&buffer
->chunk_obstack
, struct chunk_info
);
272 new_chunk_array
->prev
= buffer
->cur_chunk_array
;
273 buffer
->cur_chunk_array
= new_chunk_array
;
274 args
= new_chunk_array
->args
;
276 /* Formatting phase 1: split up TEXT->format_spec into chunks in
277 PP->buffer->args[]. Even-numbered chunks are to be output
278 verbatim, odd-numbered chunks are format specifiers.
279 %m, %%, %<, %>, and %' are replaced with the appropriate text at
282 memset (formatters
, 0, sizeof formatters
);
284 for (p
= text
->format_spec
; *p
; )
286 while (*p
!= '\0' && *p
!= '%')
288 obstack_1grow (&buffer
->chunk_obstack
, *p
);
301 obstack_1grow (&buffer
->chunk_obstack
, '%');
307 obstack_grow (&buffer
->chunk_obstack
,
308 open_quote
, strlen (open_quote
));
310 = colorize_start (pp_show_color (pp
), "quote");
311 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
318 const char *colorstr
= colorize_stop (pp_show_color (pp
));
319 obstack_grow (&buffer
->chunk_obstack
, colorstr
, strlen (colorstr
));
323 obstack_grow (&buffer
->chunk_obstack
,
324 close_quote
, strlen (close_quote
));
330 const char *colorstr
= colorize_stop (pp_show_color (pp
));
331 obstack_grow (&buffer
->chunk_obstack
, colorstr
,
339 const char *errstr
= xstrerror (text
->err_no
);
340 obstack_grow (&buffer
->chunk_obstack
, errstr
, strlen (errstr
));
346 /* Handled in phase 2. Terminate the plain chunk here. */
347 obstack_1grow (&buffer
->chunk_obstack
, '\0');
348 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
349 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
356 argno
= strtoul (p
, &end
, 10) - 1;
358 gcc_assert (*p
== '$');
362 gcc_assert (!any_unnumbered
);
367 any_unnumbered
= true;
368 gcc_assert (!any_numbered
);
370 gcc_assert (argno
< PP_NL_ARGMAX
);
371 gcc_assert (!formatters
[argno
]);
372 formatters
[argno
] = &args
[chunk
];
375 obstack_1grow (&buffer
->chunk_obstack
, *p
);
378 while (strchr ("qwl+#", p
[-1]));
382 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
383 (where M == N + 1). */
388 obstack_1grow (&buffer
->chunk_obstack
, *p
);
391 while (ISDIGIT (p
[-1]));
392 gcc_assert (p
[-1] == 's');
396 gcc_assert (*p
== '*');
397 obstack_1grow (&buffer
->chunk_obstack
, '*');
403 unsigned int argno2
= strtoul (p
, &end
, 10) - 1;
405 gcc_assert (argno2
== argno
- 1);
406 gcc_assert (!any_unnumbered
);
407 gcc_assert (*p
== '$');
410 formatters
[argno2
] = formatters
[argno
];
414 gcc_assert (!any_numbered
);
415 formatters
[argno
+1] = formatters
[argno
];
418 gcc_assert (*p
== 's');
419 obstack_1grow (&buffer
->chunk_obstack
, 's');
426 obstack_1grow (&buffer
->chunk_obstack
, '\0');
427 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
428 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
431 obstack_1grow (&buffer
->chunk_obstack
, '\0');
432 gcc_assert (chunk
< PP_NL_ARGMAX
* 2);
433 args
[chunk
++] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
436 /* Set output to the argument obstack, and switch line-wrapping and
438 buffer
->obstack
= &buffer
->chunk_obstack
;
439 old_wrapping_mode
= pp_set_verbatim_wrapping (pp
);
441 /* Second phase. Replace each formatter with the formatted text it
444 for (argno
= 0; formatters
[argno
]; argno
++)
452 /* We do not attempt to enforce any ordering on the modifier
455 for (p
= *formatters
[argno
];; p
++)
480 /* We don't support precision beyond that of "long long". */
481 gcc_assert (precision
< 2);
488 gcc_assert (!wide
|| precision
== 0);
492 pp_string (pp
, open_quote
);
493 pp_string (pp
, colorize_start (pp_show_color (pp
), "quote"));
499 pp_string (pp
, colorize_start (pp_show_color (pp
),
500 va_arg (*text
->args_ptr
,
505 pp_character (pp
, va_arg (*text
->args_ptr
, int));
511 pp_wide_integer (pp
, va_arg (*text
->args_ptr
, HOST_WIDE_INT
));
513 pp_integer_with_precision
514 (pp
, *text
->args_ptr
, precision
, int, "d");
519 pp_scalar (pp
, "%" HOST_WIDE_INT_PRINT
"o",
520 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
522 pp_integer_with_precision
523 (pp
, *text
->args_ptr
, precision
, unsigned, "o");
527 pp_string (pp
, va_arg (*text
->args_ptr
, const char *));
531 pp_pointer (pp
, va_arg (*text
->args_ptr
, void *));
536 pp_scalar (pp
, HOST_WIDE_INT_PRINT_UNSIGNED
,
537 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
539 pp_integer_with_precision
540 (pp
, *text
->args_ptr
, precision
, unsigned, "u");
545 pp_scalar (pp
, HOST_WIDE_INT_PRINT_HEX
,
546 va_arg (*text
->args_ptr
, unsigned HOST_WIDE_INT
));
548 pp_integer_with_precision
549 (pp
, *text
->args_ptr
, precision
, unsigned, "x");
557 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
558 (where M == N + 1). The format string should be verified
559 already from the first phase. */
564 n
= strtoul (p
, &end
, 10);
566 gcc_assert (*p
== 's');
570 gcc_assert (*p
== '*');
572 gcc_assert (*p
== 's');
573 n
= va_arg (*text
->args_ptr
, int);
575 /* This consumes a second entry in the formatters array. */
576 gcc_assert (formatters
[argno
] == formatters
[argno
+1]);
580 s
= va_arg (*text
->args_ptr
, const char *);
581 pp_append_text (pp
, s
, s
+ n
);
589 gcc_assert (pp_format_decoder (pp
));
590 ok
= pp_format_decoder (pp
) (pp
, text
, p
,
591 precision
, wide
, plus
, hash
);
598 pp_string (pp
, colorize_stop (pp_show_color (pp
)));
599 pp_string (pp
, close_quote
);
602 obstack_1grow (&buffer
->chunk_obstack
, '\0');
603 *formatters
[argno
] = XOBFINISH (&buffer
->chunk_obstack
, const char *);
606 #ifdef ENABLE_CHECKING
607 for (; argno
< PP_NL_ARGMAX
; argno
++)
608 gcc_assert (!formatters
[argno
]);
611 /* Revert to normal obstack and wrapping mode. */
612 buffer
->obstack
= &buffer
->formatted_obstack
;
613 buffer
->line_length
= 0;
614 pp_wrapping_mode (pp
) = old_wrapping_mode
;
618 /* Format of a message pointed to by TEXT. */
620 pp_base_output_formatted_text (pretty_printer
*pp
)
623 output_buffer
*buffer
= pp_buffer (pp
);
624 struct chunk_info
*chunk_array
= buffer
->cur_chunk_array
;
625 const char **args
= chunk_array
->args
;
627 gcc_assert (buffer
->obstack
== &buffer
->formatted_obstack
);
628 gcc_assert (buffer
->line_length
== 0);
630 /* This is a third phase, first 2 phases done in pp_base_format_args.
631 Now we actually print it. */
632 for (chunk
= 0; args
[chunk
]; chunk
++)
633 pp_string (pp
, args
[chunk
]);
635 /* Deallocate the chunk structure and everything after it (i.e. the
636 associated series of formatted strings). */
637 buffer
->cur_chunk_array
= chunk_array
->prev
;
638 obstack_free (&buffer
->chunk_obstack
, chunk_array
);
641 /* Helper subroutine of output_verbatim and verbatim. Do the appropriate
642 settings needed by BUFFER for a verbatim formatting. */
644 pp_base_format_verbatim (pretty_printer
*pp
, text_info
*text
)
646 /* Set verbatim mode. */
647 pp_wrapping_mode_t oldmode
= pp_set_verbatim_wrapping (pp
);
649 /* Do the actual formatting. */
650 pp_format (pp
, text
);
651 pp_output_formatted_text (pp
);
653 /* Restore previous settings. */
654 pp_wrapping_mode (pp
) = oldmode
;
657 /* Flush the content of BUFFER onto the attached stream. */
659 pp_base_flush (pretty_printer
*pp
)
661 pp_write_text_to_stream (pp
);
663 fflush (pp
->buffer
->stream
);
666 /* Sets the number of maximum characters per line PRETTY-PRINTER can
667 output in line-wrapping mode. A LENGTH value 0 suppresses
670 pp_base_set_line_maximum_length (pretty_printer
*pp
, int length
)
672 pp_line_cutoff (pp
) = length
;
673 pp_set_real_maximum_length (pp
);
676 /* Clear PRETTY-PRINTER output area text info. */
678 pp_base_clear_output_area (pretty_printer
*pp
)
680 obstack_free (pp
->buffer
->obstack
, obstack_base (pp
->buffer
->obstack
));
681 pp
->buffer
->line_length
= 0;
684 /* Set PREFIX for PRETTY-PRINTER. */
686 pp_base_set_prefix (pretty_printer
*pp
, const char *prefix
)
689 pp_set_real_maximum_length (pp
);
690 pp
->emitted_prefix
= false;
691 pp_indentation (pp
) = 0;
694 /* Free PRETTY-PRINTER's prefix, a previously malloc()'d string. */
696 pp_base_destroy_prefix (pretty_printer
*pp
)
698 if (pp
->prefix
!= NULL
)
700 free (CONST_CAST (char *, pp
->prefix
));
705 /* Write out PRETTY-PRINTER's prefix. */
707 pp_base_emit_prefix (pretty_printer
*pp
)
709 if (pp
->prefix
!= NULL
)
711 switch (pp_prefixing_rule (pp
))
714 case DIAGNOSTICS_SHOW_PREFIX_NEVER
:
717 case DIAGNOSTICS_SHOW_PREFIX_ONCE
:
718 if (pp
->emitted_prefix
)
723 pp_indentation (pp
) += 3;
726 case DIAGNOSTICS_SHOW_PREFIX_EVERY_LINE
:
728 int prefix_length
= strlen (pp
->prefix
);
729 pp_append_r (pp
, pp
->prefix
, prefix_length
);
730 pp
->emitted_prefix
= true;
737 /* Construct a PRETTY-PRINTER with PREFIX and of MAXIMUM_LENGTH
738 characters per line. */
740 pp_construct (pretty_printer
*pp
, const char *prefix
, int maximum_length
)
742 memset (pp
, 0, sizeof (pretty_printer
));
743 pp
->buffer
= XCNEW (output_buffer
);
744 obstack_init (&pp
->buffer
->chunk_obstack
);
745 obstack_init (&pp
->buffer
->formatted_obstack
);
746 pp
->buffer
->obstack
= &pp
->buffer
->formatted_obstack
;
747 pp
->buffer
->stream
= stderr
;
748 pp_line_cutoff (pp
) = maximum_length
;
749 pp_prefixing_rule (pp
) = DIAGNOSTICS_SHOW_PREFIX_ONCE
;
750 pp_set_prefix (pp
, prefix
);
751 pp_translate_identifiers (pp
) = true;
754 /* Append a string delimited by START and END to the output area of
755 PRETTY-PRINTER. No line wrapping is done. However, if beginning a
756 new line then emit PRETTY-PRINTER's prefix and skip any leading
757 whitespace if appropriate. The caller must ensure that it is
760 pp_base_append_text (pretty_printer
*pp
, const char *start
, const char *end
)
762 /* Emit prefix and skip whitespace if we're starting a new line. */
763 if (pp
->buffer
->line_length
== 0)
766 if (pp_is_wrapping_line (pp
))
767 while (start
!= end
&& *start
== ' ')
770 pp_append_r (pp
, start
, end
- start
);
773 /* Finishes constructing a NULL-terminated character string representing
774 the PRETTY-PRINTED text. */
776 pp_base_formatted_text (pretty_printer
*pp
)
778 obstack_1grow (pp
->buffer
->obstack
, '\0');
779 return pp_formatted_text_data (pp
);
782 /* Return a pointer to the last character emitted in PRETTY-PRINTER's
783 output area. A NULL pointer means no character available. */
785 pp_base_last_position_in_text (const pretty_printer
*pp
)
787 const char *p
= NULL
;
788 struct obstack
*text
= pp
->buffer
->obstack
;
790 if (obstack_base (text
) != obstack_next_free (text
))
791 p
= ((const char *) obstack_next_free (text
)) - 1;
795 /* Return the amount of characters PRETTY-PRINTER can accept to
796 make a full line. Meaningful only in line-wrapping mode. */
798 pp_base_remaining_character_count_for_line (pretty_printer
*pp
)
800 return pp
->maximum_length
- pp
->buffer
->line_length
;
804 /* Format a message into BUFFER a la printf. */
806 pp_printf (pretty_printer
*pp
, const char *msg
, ...)
814 text
.format_spec
= msg
;
816 pp_format (pp
, &text
);
817 pp_output_formatted_text (pp
);
822 /* Output MESSAGE verbatim into BUFFER. */
824 pp_verbatim (pretty_printer
*pp
, const char *msg
, ...)
832 text
.format_spec
= msg
;
834 pp_format_verbatim (pp
, &text
);
840 /* Have PRETTY-PRINTER start a new line. */
842 pp_base_newline (pretty_printer
*pp
)
844 obstack_1grow (pp
->buffer
->obstack
, '\n');
845 pp_needs_newline (pp
) = false;
846 pp
->buffer
->line_length
= 0;
849 /* Have PRETTY-PRINTER add a CHARACTER. */
851 pp_base_character (pretty_printer
*pp
, int c
)
853 if (pp_is_wrapping_line (pp
)
854 && pp_remaining_character_count_for_line (pp
) <= 0)
860 obstack_1grow (pp
->buffer
->obstack
, c
);
861 ++pp
->buffer
->line_length
;
864 /* Append a STRING to the output area of PRETTY-PRINTER; the STRING may
865 be line-wrapped if in appropriate mode. */
867 pp_base_string (pretty_printer
*pp
, const char *str
)
869 pp_maybe_wrap_text (pp
, str
, str
+ (str
? strlen (str
) : 0));
872 /* Maybe print out a whitespace if needed. */
875 pp_base_maybe_space (pretty_printer
*pp
)
877 if (pp_base (pp
)->padding
!= pp_none
)
880 pp_base (pp
)->padding
= pp_none
;
884 /* The string starting at P has LEN (at least 1) bytes left; if they
885 start with a valid UTF-8 sequence, return the length of that
886 sequence and set *VALUE to the value of that sequence, and
887 otherwise return 0 and set *VALUE to (unsigned int) -1. */
890 decode_utf8_char (const unsigned char *p
, size_t len
, unsigned int *value
)
901 for (t
= *p
; t
& 0x80; t
<<= 1)
904 if (utf8_len
> len
|| utf8_len
< 2 || utf8_len
> 6)
906 *value
= (unsigned int) -1;
909 ch
= *p
& ((1 << (7 - utf8_len
)) - 1);
910 for (i
= 1; i
< utf8_len
; i
++)
912 unsigned int u
= p
[i
];
913 if ((u
& 0xC0) != 0x80)
915 *value
= (unsigned int) -1;
918 ch
= (ch
<< 6) | (u
& 0x3F);
920 if ( (ch
<= 0x7F && utf8_len
> 1)
921 || (ch
<= 0x7FF && utf8_len
> 2)
922 || (ch
<= 0xFFFF && utf8_len
> 3)
923 || (ch
<= 0x1FFFFF && utf8_len
> 4)
924 || (ch
<= 0x3FFFFFF && utf8_len
> 5)
925 || (ch
>= 0xD800 && ch
<= 0xDFFF))
927 *value
= (unsigned int) -1;
940 /* Allocator for identifier_to_locale and corresponding function to
943 void *(*identifier_to_locale_alloc
) (size_t) = xmalloc
;
944 void (*identifier_to_locale_free
) (void *) = free
;
946 /* Given IDENT, an identifier in the internal encoding, return a
947 version of IDENT suitable for diagnostics in the locale character
948 set: either IDENT itself, or a string, allocated using
949 identifier_to_locale_alloc, converted to the locale character set
950 and using escape sequences if not representable in the locale
951 character set or containing control characters or invalid byte
952 sequences. Existing backslashes in IDENT are not doubled, so the
953 result may not uniquely specify the contents of an arbitrary byte
954 sequence identifier. */
957 identifier_to_locale (const char *ident
)
959 const unsigned char *uid
= (const unsigned char *) ident
;
960 size_t idlen
= strlen (ident
);
961 bool valid_printable_utf8
= true;
962 bool all_ascii
= true;
965 for (i
= 0; i
< idlen
;)
968 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
969 if (utf8_len
== 0 || c
<= 0x1F || (c
>= 0x7F && c
<= 0x9F))
971 valid_printable_utf8
= false;
979 /* If IDENT contains invalid UTF-8 sequences (which may occur with
980 attributes putting arbitrary byte sequences in identifiers), or
981 control characters, we use octal escape sequences for all bytes
982 outside printable ASCII. */
983 if (!valid_printable_utf8
)
985 char *ret
= (char *) identifier_to_locale_alloc (4 * idlen
+ 1);
987 for (i
= 0; i
< idlen
; i
++)
989 if (uid
[i
] > 0x1F && uid
[i
] < 0x7F)
993 sprintf (p
, "\\%03o", uid
[i
]);
1001 /* Otherwise, if it is valid printable ASCII, or printable UTF-8
1002 with the locale character set being UTF-8, IDENT is used. */
1003 if (all_ascii
|| locale_utf8
)
1006 /* Otherwise IDENT is converted to the locale character set if
1008 #if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV
1009 if (locale_encoding
!= NULL
)
1011 iconv_t cd
= iconv_open (locale_encoding
, "UTF-8");
1012 bool conversion_ok
= true;
1014 if (cd
!= (iconv_t
) -1)
1016 size_t ret_alloc
= 4 * idlen
+ 1;
1019 /* Repeat the whole conversion process as needed with
1020 larger buffers so non-reversible transformations can
1021 always be detected. */
1022 ICONV_CONST
char *inbuf
= CONST_CAST (char *, ident
);
1024 size_t inbytesleft
= idlen
;
1025 size_t outbytesleft
= ret_alloc
- 1;
1028 ret
= (char *) identifier_to_locale_alloc (ret_alloc
);
1031 if (iconv (cd
, 0, 0, 0, 0) == (size_t) -1)
1033 conversion_ok
= false;
1037 iconv_ret
= iconv (cd
, &inbuf
, &inbytesleft
,
1038 &outbuf
, &outbytesleft
);
1039 if (iconv_ret
== (size_t) -1 || inbytesleft
!= 0)
1044 identifier_to_locale_free (ret
);
1050 conversion_ok
= false;
1054 else if (iconv_ret
!= 0)
1056 conversion_ok
= false;
1059 /* Return to initial shift state. */
1060 if (iconv (cd
, 0, 0, &outbuf
, &outbytesleft
) == (size_t) -1)
1065 identifier_to_locale_free (ret
);
1071 conversion_ok
= false;
1085 /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */
1087 char *ret
= (char *) identifier_to_locale_alloc (10 * idlen
+ 1);
1089 for (i
= 0; i
< idlen
;)
1092 size_t utf8_len
= decode_utf8_char (&uid
[i
], idlen
- i
, &c
);
1097 sprintf (p
, "\\U%08x", c
);