2011-02-06 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / pretty-print.c
blob8aa99787c59ab5fe5fa77d34f5498f74ca6a0a10
1 /* Various declarations for language-independent pretty-print subroutines.
2 Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Gabriel Dos Reis <gdr@integrable-solutions.net>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "intl.h"
26 #include "pretty-print.h"
28 #if HAVE_ICONV
29 #include <iconv.h>
30 #endif
32 /* A pointer to the formatted diagnostic message. */
33 #define pp_formatted_text_data(PP) \
34 ((const char *) obstack_base (pp_base (PP)->buffer->obstack))
36 /* Format an integer given by va_arg (ARG, type-specifier T) where
37 type-specifier is a precision modifier as indicated by PREC. F is
38 a string used to construct the appropriate format-specifier. */
39 #define pp_integer_with_precision(PP, ARG, PREC, T, F) \
40 do \
41 switch (PREC) \
42 { \
43 case 0: \
44 pp_scalar (PP, "%" F, va_arg (ARG, T)); \
45 break; \
47 case 1: \
48 pp_scalar (PP, "%l" F, va_arg (ARG, long T)); \
49 break; \
51 case 2: \
52 pp_scalar (PP, "%" HOST_LONG_LONG_FORMAT F, va_arg (ARG, long long T)); \
53 break; \
55 default: \
56 break; \
57 } \
58 while (0)
61 /* Subroutine of pp_set_maximum_length. Set up PRETTY-PRINTER's
62 internal maximum characters per line. */
63 static void
64 pp_set_real_maximum_length (pretty_printer *pp)
66 /* If we're told not to wrap lines then do the obvious thing. In case
67 we'll emit prefix only once per message, it is appropriate
68 not to increase unnecessarily the line-length cut-off. */
69 if (!pp_is_wrapping_line (pp)
70 || pp_prefixing_rule (pp) == DIAGNOSTICS_SHOW_PREFIX_ONCE
71 || pp_prefixing_rule (pp) == DIAGNOSTICS_SHOW_PREFIX_NEVER)
72 pp->maximum_length = pp_line_cutoff (pp);
73 else
75 int prefix_length = pp->prefix ? strlen (pp->prefix) : 0;
76 /* If the prefix is ridiculously too long, output at least
77 32 characters. */
78 if (pp_line_cutoff (pp) - prefix_length < 32)
79 pp->maximum_length = pp_line_cutoff (pp) + 32;
80 else
81 pp->maximum_length = pp_line_cutoff (pp);
85 /* Clear PRETTY-PRINTER's output state. */
86 static inline void
87 pp_clear_state (pretty_printer *pp)
89 pp->emitted_prefix = false;
90 pp_indentation (pp) = 0;
93 /* Flush the formatted text of PRETTY-PRINTER onto the attached stream. */
94 void
95 pp_write_text_to_stream (pretty_printer *pp)
97 const char *text = pp_formatted_text (pp);
98 fputs (text, pp->buffer->stream);
99 pp_clear_output_area (pp);
102 /* Wrap a text delimited by START and END into PRETTY-PRINTER. */
103 static void
104 pp_wrap_text (pretty_printer *pp, const char *start, const char *end)
106 bool wrapping_line = pp_is_wrapping_line (pp);
108 while (start != end)
110 /* Dump anything bordered by whitespaces. */
112 const char *p = start;
113 while (p != end && !ISBLANK (*p) && *p != '\n')
114 ++p;
115 if (wrapping_line
116 && p - start >= pp_remaining_character_count_for_line (pp))
117 pp_newline (pp);
118 pp_append_text (pp, start, p);
119 start = p;
122 if (start != end && ISBLANK (*start))
124 pp_space (pp);
125 ++start;
127 if (start != end && *start == '\n')
129 pp_newline (pp);
130 ++start;
135 /* Same as pp_wrap_text but wrap text only when in line-wrapping mode. */
136 static inline void
137 pp_maybe_wrap_text (pretty_printer *pp, const char *start, const char *end)
139 if (pp_is_wrapping_line (pp))
140 pp_wrap_text (pp, start, end);
141 else
142 pp_append_text (pp, start, end);
145 /* Append to the output area of PRETTY-PRINTER a string specified by its
146 STARTing character and LENGTH. */
147 static inline void
148 pp_append_r (pretty_printer *pp, const char *start, int length)
150 obstack_grow (pp->buffer->obstack, start, length);
151 pp->buffer->line_length += length;
154 /* Insert enough spaces into the output area of PRETTY-PRINTER to bring
155 the column position to the current indentation level, assuming that a
156 newline has just been written to the buffer. */
157 void
158 pp_base_indent (pretty_printer *pp)
160 int n = pp_indentation (pp);
161 int i;
163 for (i = 0; i < n; ++i)
164 pp_space (pp);
167 /* The following format specifiers are recognized as being client independent:
168 %d, %i: (signed) integer in base ten.
169 %u: unsigned integer in base ten.
170 %o: unsigned integer in base eight.
171 %x: unsigned integer in base sixteen.
172 %ld, %li, %lo, %lu, %lx: long versions of the above.
173 %lld, %lli, %llo, %llu, %llx: long long versions.
174 %wd, %wi, %wo, %wu, %wx: HOST_WIDE_INT versions.
175 %c: character.
176 %s: string.
177 %p: pointer.
178 %m: strerror(text->err_no) - does not consume a value from args_ptr.
179 %%: '%'.
180 %<: opening quote.
181 %>: closing quote.
182 %': apostrophe (should only be used in untranslated messages;
183 translations should use appropriate punctuation directly).
184 %.*s: a substring the length of which is specified by an argument
185 integer.
186 %Ns: likewise, but length specified as constant in the format string.
187 Flag 'q': quote formatted text (must come immediately after '%').
189 Arguments can be used sequentially, or through %N$ resp. *N$
190 notation Nth argument after the format string. If %N$ / *N$
191 notation is used, it must be used for all arguments, except %m, %%,
192 %<, %> and %', which may not have a number, as they do not consume
193 an argument. When %M$.*N$s is used, M must be N + 1. (This may
194 also be written %M$.*s, provided N is not otherwise used.) The
195 format string must have conversion specifiers with argument numbers
196 1 up to highest argument; each argument may only be used once.
197 A format string can have at most 30 arguments. */
199 /* Formatting phases 1 and 2: render TEXT->format_spec plus
200 TEXT->args_ptr into a series of chunks in PP->buffer->args[].
201 Phase 3 is in pp_base_format_text. */
203 void
204 pp_base_format (pretty_printer *pp, text_info *text)
206 output_buffer *buffer = pp->buffer;
207 const char *p;
208 const char **args;
209 struct chunk_info *new_chunk_array;
211 unsigned int curarg = 0, chunk = 0, argno;
212 pp_wrapping_mode_t old_wrapping_mode;
213 bool any_unnumbered = false, any_numbered = false;
214 const char **formatters[PP_NL_ARGMAX];
216 /* Allocate a new chunk structure. */
217 new_chunk_array = XOBNEW (&buffer->chunk_obstack, struct chunk_info);
218 new_chunk_array->prev = buffer->cur_chunk_array;
219 buffer->cur_chunk_array = new_chunk_array;
220 args = new_chunk_array->args;
222 /* Formatting phase 1: split up TEXT->format_spec into chunks in
223 PP->buffer->args[]. Even-numbered chunks are to be output
224 verbatim, odd-numbered chunks are format specifiers.
225 %m, %%, %<, %>, and %' are replaced with the appropriate text at
226 this point. */
228 memset (formatters, 0, sizeof formatters);
230 for (p = text->format_spec; *p; )
232 while (*p != '\0' && *p != '%')
234 obstack_1grow (&buffer->chunk_obstack, *p);
235 p++;
238 if (*p == '\0')
239 break;
241 switch (*++p)
243 case '\0':
244 gcc_unreachable ();
246 case '%':
247 obstack_1grow (&buffer->chunk_obstack, '%');
248 p++;
249 continue;
251 case '<':
252 obstack_grow (&buffer->chunk_obstack,
253 open_quote, strlen (open_quote));
254 p++;
255 continue;
257 case '>':
258 case '\'':
259 obstack_grow (&buffer->chunk_obstack,
260 close_quote, strlen (close_quote));
261 p++;
262 continue;
264 case 'm':
266 const char *errstr = xstrerror (text->err_no);
267 obstack_grow (&buffer->chunk_obstack, errstr, strlen (errstr));
269 p++;
270 continue;
272 default:
273 /* Handled in phase 2. Terminate the plain chunk here. */
274 obstack_1grow (&buffer->chunk_obstack, '\0');
275 gcc_assert (chunk < PP_NL_ARGMAX * 2);
276 args[chunk++] = XOBFINISH (&buffer->chunk_obstack, const char *);
277 break;
280 if (ISDIGIT (*p))
282 char *end;
283 argno = strtoul (p, &end, 10) - 1;
284 p = end;
285 gcc_assert (*p == '$');
286 p++;
288 any_numbered = true;
289 gcc_assert (!any_unnumbered);
291 else
293 argno = curarg++;
294 any_unnumbered = true;
295 gcc_assert (!any_numbered);
297 gcc_assert (argno < PP_NL_ARGMAX);
298 gcc_assert (!formatters[argno]);
299 formatters[argno] = &args[chunk];
302 obstack_1grow (&buffer->chunk_obstack, *p);
303 p++;
305 while (strchr ("qwl+#", p[-1]));
307 if (p[-1] == '.')
309 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
310 (where M == N + 1). */
311 if (ISDIGIT (*p))
315 obstack_1grow (&buffer->chunk_obstack, *p);
316 p++;
318 while (ISDIGIT (p[-1]));
319 gcc_assert (p[-1] == 's');
321 else
323 gcc_assert (*p == '*');
324 obstack_1grow (&buffer->chunk_obstack, '*');
325 p++;
327 if (ISDIGIT (*p))
329 char *end;
330 unsigned int argno2 = strtoul (p, &end, 10) - 1;
331 p = end;
332 gcc_assert (argno2 == argno - 1);
333 gcc_assert (!any_unnumbered);
334 gcc_assert (*p == '$');
336 p++;
337 formatters[argno2] = formatters[argno];
339 else
341 gcc_assert (!any_numbered);
342 formatters[argno+1] = formatters[argno];
343 curarg++;
345 gcc_assert (*p == 's');
346 obstack_1grow (&buffer->chunk_obstack, 's');
347 p++;
350 if (*p == '\0')
351 break;
353 obstack_1grow (&buffer->chunk_obstack, '\0');
354 gcc_assert (chunk < PP_NL_ARGMAX * 2);
355 args[chunk++] = XOBFINISH (&buffer->chunk_obstack, const char *);
358 obstack_1grow (&buffer->chunk_obstack, '\0');
359 gcc_assert (chunk < PP_NL_ARGMAX * 2);
360 args[chunk++] = XOBFINISH (&buffer->chunk_obstack, const char *);
361 args[chunk] = 0;
363 /* Set output to the argument obstack, and switch line-wrapping and
364 prefixing off. */
365 buffer->obstack = &buffer->chunk_obstack;
366 old_wrapping_mode = pp_set_verbatim_wrapping (pp);
368 /* Second phase. Replace each formatter with the formatted text it
369 corresponds to. */
371 for (argno = 0; formatters[argno]; argno++)
373 int precision = 0;
374 bool wide = false;
375 bool plus = false;
376 bool hash = false;
377 bool quote = false;
379 /* We do not attempt to enforce any ordering on the modifier
380 characters. */
382 for (p = *formatters[argno];; p++)
384 switch (*p)
386 case 'q':
387 gcc_assert (!quote);
388 quote = true;
389 continue;
391 case '+':
392 gcc_assert (!plus);
393 plus = true;
394 continue;
396 case '#':
397 gcc_assert (!hash);
398 hash = true;
399 continue;
401 case 'w':
402 gcc_assert (!wide);
403 wide = true;
404 continue;
406 case 'l':
407 /* We don't support precision beyond that of "long long". */
408 gcc_assert (precision < 2);
409 precision++;
410 continue;
412 break;
415 gcc_assert (!wide || precision == 0);
417 if (quote)
418 pp_string (pp, open_quote);
420 switch (*p)
422 case 'c':
423 pp_character (pp, va_arg (*text->args_ptr, int));
424 break;
426 case 'd':
427 case 'i':
428 if (wide)
429 pp_wide_integer (pp, va_arg (*text->args_ptr, HOST_WIDE_INT));
430 else
431 pp_integer_with_precision
432 (pp, *text->args_ptr, precision, int, "d");
433 break;
435 case 'o':
436 if (wide)
437 pp_scalar (pp, "%" HOST_WIDE_INT_PRINT "o",
438 va_arg (*text->args_ptr, unsigned HOST_WIDE_INT));
439 else
440 pp_integer_with_precision
441 (pp, *text->args_ptr, precision, unsigned, "o");
442 break;
444 case 's':
445 pp_string (pp, va_arg (*text->args_ptr, const char *));
446 break;
448 case 'p':
449 pp_pointer (pp, va_arg (*text->args_ptr, void *));
450 break;
452 case 'u':
453 if (wide)
454 pp_scalar (pp, HOST_WIDE_INT_PRINT_UNSIGNED,
455 va_arg (*text->args_ptr, unsigned HOST_WIDE_INT));
456 else
457 pp_integer_with_precision
458 (pp, *text->args_ptr, precision, unsigned, "u");
459 break;
461 case 'x':
462 if (wide)
463 pp_scalar (pp, HOST_WIDE_INT_PRINT_HEX,
464 va_arg (*text->args_ptr, unsigned HOST_WIDE_INT));
465 else
466 pp_integer_with_precision
467 (pp, *text->args_ptr, precision, unsigned, "x");
468 break;
470 case '.':
472 int n;
473 const char *s;
475 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
476 (where M == N + 1). The format string should be verified
477 already from the first phase. */
478 p++;
479 if (ISDIGIT (*p))
481 char *end;
482 n = strtoul (p, &end, 10);
483 p = end;
484 gcc_assert (*p == 's');
486 else
488 gcc_assert (*p == '*');
489 p++;
490 gcc_assert (*p == 's');
491 n = va_arg (*text->args_ptr, int);
493 /* This consumes a second entry in the formatters array. */
494 gcc_assert (formatters[argno] == formatters[argno+1]);
495 argno++;
498 s = va_arg (*text->args_ptr, const char *);
499 pp_append_text (pp, s, s + n);
501 break;
503 default:
505 bool ok;
507 gcc_assert (pp_format_decoder (pp));
508 ok = pp_format_decoder (pp) (pp, text, p,
509 precision, wide, plus, hash);
510 gcc_assert (ok);
514 if (quote)
515 pp_string (pp, close_quote);
517 obstack_1grow (&buffer->chunk_obstack, '\0');
518 *formatters[argno] = XOBFINISH (&buffer->chunk_obstack, const char *);
521 #ifdef ENABLE_CHECKING
522 for (; argno < PP_NL_ARGMAX; argno++)
523 gcc_assert (!formatters[argno]);
524 #endif
526 /* Revert to normal obstack and wrapping mode. */
527 buffer->obstack = &buffer->formatted_obstack;
528 buffer->line_length = 0;
529 pp_wrapping_mode (pp) = old_wrapping_mode;
530 pp_clear_state (pp);
533 /* Format of a message pointed to by TEXT. */
534 void
535 pp_base_output_formatted_text (pretty_printer *pp)
537 unsigned int chunk;
538 output_buffer *buffer = pp_buffer (pp);
539 struct chunk_info *chunk_array = buffer->cur_chunk_array;
540 const char **args = chunk_array->args;
542 gcc_assert (buffer->obstack == &buffer->formatted_obstack);
543 gcc_assert (buffer->line_length == 0);
545 /* This is a third phase, first 2 phases done in pp_base_format_args.
546 Now we actually print it. */
547 for (chunk = 0; args[chunk]; chunk++)
548 pp_string (pp, args[chunk]);
550 /* Deallocate the chunk structure and everything after it (i.e. the
551 associated series of formatted strings). */
552 buffer->cur_chunk_array = chunk_array->prev;
553 obstack_free (&buffer->chunk_obstack, chunk_array);
556 /* Helper subroutine of output_verbatim and verbatim. Do the appropriate
557 settings needed by BUFFER for a verbatim formatting. */
558 void
559 pp_base_format_verbatim (pretty_printer *pp, text_info *text)
561 /* Set verbatim mode. */
562 pp_wrapping_mode_t oldmode = pp_set_verbatim_wrapping (pp);
564 /* Do the actual formatting. */
565 pp_format (pp, text);
566 pp_output_formatted_text (pp);
568 /* Restore previous settings. */
569 pp_wrapping_mode (pp) = oldmode;
572 /* Flush the content of BUFFER onto the attached stream. */
573 void
574 pp_base_flush (pretty_printer *pp)
576 pp_write_text_to_stream (pp);
577 pp_clear_state (pp);
578 fputc ('\n', pp->buffer->stream);
579 fflush (pp->buffer->stream);
580 pp_needs_newline (pp) = false;
583 /* Sets the number of maximum characters per line PRETTY-PRINTER can
584 output in line-wrapping mode. A LENGTH value 0 suppresses
585 line-wrapping. */
586 void
587 pp_base_set_line_maximum_length (pretty_printer *pp, int length)
589 pp_line_cutoff (pp) = length;
590 pp_set_real_maximum_length (pp);
593 /* Clear PRETTY-PRINTER output area text info. */
594 void
595 pp_base_clear_output_area (pretty_printer *pp)
597 obstack_free (pp->buffer->obstack, obstack_base (pp->buffer->obstack));
598 pp->buffer->line_length = 0;
601 /* Set PREFIX for PRETTY-PRINTER. */
602 void
603 pp_base_set_prefix (pretty_printer *pp, const char *prefix)
605 pp->prefix = prefix;
606 pp_set_real_maximum_length (pp);
607 pp->emitted_prefix = false;
608 pp_indentation (pp) = 0;
611 /* Free PRETTY-PRINTER's prefix, a previously malloc()'d string. */
612 void
613 pp_base_destroy_prefix (pretty_printer *pp)
615 if (pp->prefix != NULL)
617 free (CONST_CAST (char *, pp->prefix));
618 pp->prefix = NULL;
622 /* Write out PRETTY-PRINTER's prefix. */
623 void
624 pp_base_emit_prefix (pretty_printer *pp)
626 if (pp->prefix != NULL)
628 switch (pp_prefixing_rule (pp))
630 default:
631 case DIAGNOSTICS_SHOW_PREFIX_NEVER:
632 break;
634 case DIAGNOSTICS_SHOW_PREFIX_ONCE:
635 if (pp->emitted_prefix)
637 pp_base_indent (pp);
638 break;
640 pp_indentation (pp) += 3;
641 /* Fall through. */
643 case DIAGNOSTICS_SHOW_PREFIX_EVERY_LINE:
645 int prefix_length = strlen (pp->prefix);
646 pp_append_r (pp, pp->prefix, prefix_length);
647 pp->emitted_prefix = true;
649 break;
654 /* Construct a PRETTY-PRINTER with PREFIX and of MAXIMUM_LENGTH
655 characters per line. */
656 void
657 pp_construct (pretty_printer *pp, const char *prefix, int maximum_length)
659 memset (pp, 0, sizeof (pretty_printer));
660 pp->buffer = XCNEW (output_buffer);
661 obstack_init (&pp->buffer->chunk_obstack);
662 obstack_init (&pp->buffer->formatted_obstack);
663 pp->buffer->obstack = &pp->buffer->formatted_obstack;
664 pp->buffer->stream = stderr;
665 pp_line_cutoff (pp) = maximum_length;
666 pp_prefixing_rule (pp) = DIAGNOSTICS_SHOW_PREFIX_ONCE;
667 pp_set_prefix (pp, prefix);
668 pp_translate_identifiers (pp) = true;
671 /* Append a string delimited by START and END to the output area of
672 PRETTY-PRINTER. No line wrapping is done. However, if beginning a
673 new line then emit PRETTY-PRINTER's prefix and skip any leading
674 whitespace if appropriate. The caller must ensure that it is
675 safe to do so. */
676 void
677 pp_base_append_text (pretty_printer *pp, const char *start, const char *end)
679 /* Emit prefix and skip whitespace if we're starting a new line. */
680 if (pp->buffer->line_length == 0)
682 pp_emit_prefix (pp);
683 if (pp_is_wrapping_line (pp))
684 while (start != end && *start == ' ')
685 ++start;
687 pp_append_r (pp, start, end - start);
690 /* Finishes constructing a NULL-terminated character string representing
691 the PRETTY-PRINTED text. */
692 const char *
693 pp_base_formatted_text (pretty_printer *pp)
695 obstack_1grow (pp->buffer->obstack, '\0');
696 return pp_formatted_text_data (pp);
699 /* Return a pointer to the last character emitted in PRETTY-PRINTER's
700 output area. A NULL pointer means no character available. */
701 const char *
702 pp_base_last_position_in_text (const pretty_printer *pp)
704 const char *p = NULL;
705 struct obstack *text = pp->buffer->obstack;
707 if (obstack_base (text) != obstack_next_free (text))
708 p = ((const char *) obstack_next_free (text)) - 1;
709 return p;
712 /* Return the amount of characters PRETTY-PRINTER can accept to
713 make a full line. Meaningful only in line-wrapping mode. */
715 pp_base_remaining_character_count_for_line (pretty_printer *pp)
717 return pp->maximum_length - pp->buffer->line_length;
721 /* Format a message into BUFFER a la printf. */
722 void
723 pp_printf (pretty_printer *pp, const char *msg, ...)
725 text_info text;
726 va_list ap;
728 va_start (ap, msg);
729 text.err_no = errno;
730 text.args_ptr = &ap;
731 text.format_spec = msg;
732 text.locus = NULL;
733 pp_format (pp, &text);
734 pp_output_formatted_text (pp);
735 va_end (ap);
739 /* Output MESSAGE verbatim into BUFFER. */
740 void
741 pp_verbatim (pretty_printer *pp, const char *msg, ...)
743 text_info text;
744 va_list ap;
746 va_start (ap, msg);
747 text.err_no = errno;
748 text.args_ptr = &ap;
749 text.format_spec = msg;
750 text.locus = NULL;
751 pp_format_verbatim (pp, &text);
752 va_end (ap);
757 /* Have PRETTY-PRINTER start a new line. */
758 void
759 pp_base_newline (pretty_printer *pp)
761 obstack_1grow (pp->buffer->obstack, '\n');
762 pp->buffer->line_length = 0;
765 /* Have PRETTY-PRINTER add a CHARACTER. */
766 void
767 pp_base_character (pretty_printer *pp, int c)
769 if (pp_is_wrapping_line (pp)
770 && pp_remaining_character_count_for_line (pp) <= 0)
772 pp_newline (pp);
773 if (ISSPACE (c))
774 return;
776 obstack_1grow (pp->buffer->obstack, c);
777 ++pp->buffer->line_length;
780 /* Append a STRING to the output area of PRETTY-PRINTER; the STRING may
781 be line-wrapped if in appropriate mode. */
782 void
783 pp_base_string (pretty_printer *pp, const char *str)
785 pp_maybe_wrap_text (pp, str, str + (str ? strlen (str) : 0));
788 /* Maybe print out a whitespace if needed. */
790 void
791 pp_base_maybe_space (pretty_printer *pp)
793 if (pp_base (pp)->padding != pp_none)
795 pp_space (pp);
796 pp_base (pp)->padding = pp_none;
800 /* The string starting at P has LEN (at least 1) bytes left; if they
801 start with a valid UTF-8 sequence, return the length of that
802 sequence and set *VALUE to the value of that sequence, and
803 otherwise return 0 and set *VALUE to (unsigned int) -1. */
805 static int
806 decode_utf8_char (const unsigned char *p, size_t len, unsigned int *value)
808 unsigned int t = *p;
810 if (len == 0)
811 abort ();
812 if (t & 0x80)
814 size_t utf8_len = 0;
815 unsigned int ch;
816 size_t i;
817 for (t = *p; t & 0x80; t <<= 1)
818 utf8_len++;
820 if (utf8_len > len || utf8_len < 2 || utf8_len > 6)
822 *value = (unsigned int) -1;
823 return 0;
825 ch = *p & ((1 << (7 - utf8_len)) - 1);
826 for (i = 1; i < utf8_len; i++)
828 unsigned int u = p[i];
829 if ((u & 0xC0) != 0x80)
831 *value = (unsigned int) -1;
832 return 0;
834 ch = (ch << 6) | (u & 0x3F);
836 if ( (ch <= 0x7F && utf8_len > 1)
837 || (ch <= 0x7FF && utf8_len > 2)
838 || (ch <= 0xFFFF && utf8_len > 3)
839 || (ch <= 0x1FFFFF && utf8_len > 4)
840 || (ch <= 0x3FFFFFF && utf8_len > 5)
841 || (ch >= 0xD800 && ch <= 0xDFFF))
843 *value = (unsigned int) -1;
844 return 0;
846 *value = ch;
847 return utf8_len;
849 else
851 *value = t;
852 return 1;
856 /* Allocator for identifier_to_locale and corresponding function to
857 free memory. */
859 void *(*identifier_to_locale_alloc) (size_t) = xmalloc;
860 void (*identifier_to_locale_free) (void *) = free;
862 /* Given IDENT, an identifier in the internal encoding, return a
863 version of IDENT suitable for diagnostics in the locale character
864 set: either IDENT itself, or a string, allocated using
865 identifier_to_locale_alloc, converted to the locale character set
866 and using escape sequences if not representable in the locale
867 character set or containing control characters or invalid byte
868 sequences. Existing backslashes in IDENT are not doubled, so the
869 result may not uniquely specify the contents of an arbitrary byte
870 sequence identifier. */
872 const char *
873 identifier_to_locale (const char *ident)
875 const unsigned char *uid = (const unsigned char *) ident;
876 size_t idlen = strlen (ident);
877 bool valid_printable_utf8 = true;
878 bool all_ascii = true;
879 size_t i;
881 for (i = 0; i < idlen;)
883 unsigned int c;
884 size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c);
885 if (utf8_len == 0 || c <= 0x1F || (c >= 0x7F && c <= 0x9F))
887 valid_printable_utf8 = false;
888 break;
890 if (utf8_len > 1)
891 all_ascii = false;
892 i += utf8_len;
895 /* If IDENT contains invalid UTF-8 sequences (which may occur with
896 attributes putting arbitrary byte sequences in identifiers), or
897 control characters, we use octal escape sequences for all bytes
898 outside printable ASCII. */
899 if (!valid_printable_utf8)
901 char *ret = (char *) identifier_to_locale_alloc (4 * idlen + 1);
902 char *p = ret;
903 for (i = 0; i < idlen; i++)
905 if (uid[i] > 0x1F && uid[i] < 0x7F)
906 *p++ = uid[i];
907 else
909 sprintf (p, "\\%03o", uid[i]);
910 p += 4;
913 *p = 0;
914 return ret;
917 /* Otherwise, if it is valid printable ASCII, or printable UTF-8
918 with the locale character set being UTF-8, IDENT is used. */
919 if (all_ascii || locale_utf8)
920 return ident;
922 /* Otherwise IDENT is converted to the locale character set if
923 possible. */
924 #if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV
925 if (locale_encoding != NULL)
927 iconv_t cd = iconv_open (locale_encoding, "UTF-8");
928 bool conversion_ok = true;
929 char *ret = NULL;
930 if (cd != (iconv_t) -1)
932 size_t ret_alloc = 4 * idlen + 1;
933 for (;;)
935 /* Repeat the whole conversion process as needed with
936 larger buffers so non-reversible transformations can
937 always be detected. */
938 ICONV_CONST char *inbuf = CONST_CAST (char *, ident);
939 char *outbuf;
940 size_t inbytesleft = idlen;
941 size_t outbytesleft = ret_alloc - 1;
942 size_t iconv_ret;
944 ret = (char *) identifier_to_locale_alloc (ret_alloc);
945 outbuf = ret;
947 if (iconv (cd, 0, 0, 0, 0) == (size_t) -1)
949 conversion_ok = false;
950 break;
953 iconv_ret = iconv (cd, &inbuf, &inbytesleft,
954 &outbuf, &outbytesleft);
955 if (iconv_ret == (size_t) -1 || inbytesleft != 0)
957 if (errno == E2BIG)
959 ret_alloc *= 2;
960 identifier_to_locale_free (ret);
961 ret = NULL;
962 continue;
964 else
966 conversion_ok = false;
967 break;
970 else if (iconv_ret != 0)
972 conversion_ok = false;
973 break;
975 /* Return to initial shift state. */
976 if (iconv (cd, 0, 0, &outbuf, &outbytesleft) == (size_t) -1)
978 if (errno == E2BIG)
980 ret_alloc *= 2;
981 identifier_to_locale_free (ret);
982 ret = NULL;
983 continue;
985 else
987 conversion_ok = false;
988 break;
991 *outbuf = 0;
992 break;
994 iconv_close (cd);
995 if (conversion_ok)
996 return ret;
999 #endif
1001 /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */
1003 char *ret = (char *) identifier_to_locale_alloc (10 * idlen + 1);
1004 char *p = ret;
1005 for (i = 0; i < idlen;)
1007 unsigned int c;
1008 size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c);
1009 if (utf8_len == 1)
1010 *p++ = uid[i];
1011 else
1013 sprintf (p, "\\U%08x", c);
1014 p += 10;
1016 i += utf8_len;
1018 *p = 0;
1019 return ret;