2012-11-04 Janus Weil <janus@gcc.gnu.org>
[official-gcc.git] / gcc / pretty-print.c
blobc1282c78a9e9fe6f55441761824c8a03ec4d1e8c
1 /* Various declarations for language-independent pretty-print subroutines.
2 Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Gabriel Dos Reis <gdr@integrable-solutions.net>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "intl.h"
26 #include "pretty-print.h"
28 #if HAVE_ICONV
29 #include <iconv.h>
30 #endif
32 /* A pointer to the formatted diagnostic message. */
33 #define pp_formatted_text_data(PP) \
34 ((const char *) obstack_base (pp_base (PP)->buffer->obstack))
36 /* Format an integer given by va_arg (ARG, type-specifier T) where
37 type-specifier is a precision modifier as indicated by PREC. F is
38 a string used to construct the appropriate format-specifier. */
39 #define pp_integer_with_precision(PP, ARG, PREC, T, F) \
40 do \
41 switch (PREC) \
42 { \
43 case 0: \
44 pp_scalar (PP, "%" F, va_arg (ARG, T)); \
45 break; \
47 case 1: \
48 pp_scalar (PP, "%l" F, va_arg (ARG, long T)); \
49 break; \
51 case 2: \
52 pp_scalar (PP, "%" HOST_LONG_LONG_FORMAT F, va_arg (ARG, long long T)); \
53 break; \
55 default: \
56 break; \
57 } \
58 while (0)
61 /* Subroutine of pp_set_maximum_length. Set up PRETTY-PRINTER's
62 internal maximum characters per line. */
63 static void
64 pp_set_real_maximum_length (pretty_printer *pp)
66 /* If we're told not to wrap lines then do the obvious thing. In case
67 we'll emit prefix only once per message, it is appropriate
68 not to increase unnecessarily the line-length cut-off. */
69 if (!pp_is_wrapping_line (pp)
70 || pp_prefixing_rule (pp) == DIAGNOSTICS_SHOW_PREFIX_ONCE
71 || pp_prefixing_rule (pp) == DIAGNOSTICS_SHOW_PREFIX_NEVER)
72 pp->maximum_length = pp_line_cutoff (pp);
73 else
75 int prefix_length = pp->prefix ? strlen (pp->prefix) : 0;
76 /* If the prefix is ridiculously too long, output at least
77 32 characters. */
78 if (pp_line_cutoff (pp) - prefix_length < 32)
79 pp->maximum_length = pp_line_cutoff (pp) + 32;
80 else
81 pp->maximum_length = pp_line_cutoff (pp);
85 /* Clear PRETTY-PRINTER's output state. */
86 static inline void
87 pp_clear_state (pretty_printer *pp)
89 pp->emitted_prefix = false;
90 pp_indentation (pp) = 0;
93 /* Flush the formatted text of PRETTY-PRINTER onto the attached stream. */
94 void
95 pp_write_text_to_stream (pretty_printer *pp)
97 const char *text = pp_formatted_text (pp);
98 fputs (text, pp->buffer->stream);
99 pp_clear_output_area (pp);
102 /* Wrap a text delimited by START and END into PRETTY-PRINTER. */
103 static void
104 pp_wrap_text (pretty_printer *pp, const char *start, const char *end)
106 bool wrapping_line = pp_is_wrapping_line (pp);
108 while (start != end)
110 /* Dump anything bordered by whitespaces. */
112 const char *p = start;
113 while (p != end && !ISBLANK (*p) && *p != '\n')
114 ++p;
115 if (wrapping_line
116 && p - start >= pp_remaining_character_count_for_line (pp))
117 pp_newline (pp);
118 pp_append_text (pp, start, p);
119 start = p;
122 if (start != end && ISBLANK (*start))
124 pp_space (pp);
125 ++start;
127 if (start != end && *start == '\n')
129 pp_newline (pp);
130 ++start;
135 /* Same as pp_wrap_text but wrap text only when in line-wrapping mode. */
136 static inline void
137 pp_maybe_wrap_text (pretty_printer *pp, const char *start, const char *end)
139 if (pp_is_wrapping_line (pp))
140 pp_wrap_text (pp, start, end);
141 else
142 pp_append_text (pp, start, end);
145 /* Append to the output area of PRETTY-PRINTER a string specified by its
146 STARTing character and LENGTH. */
147 static inline void
148 pp_append_r (pretty_printer *pp, const char *start, int length)
150 obstack_grow (pp->buffer->obstack, start, length);
151 pp->buffer->line_length += length;
154 /* Insert enough spaces into the output area of PRETTY-PRINTER to bring
155 the column position to the current indentation level, assuming that a
156 newline has just been written to the buffer. */
157 void
158 pp_base_indent (pretty_printer *pp)
160 int n = pp_indentation (pp);
161 int i;
163 for (i = 0; i < n; ++i)
164 pp_space (pp);
167 /* The following format specifiers are recognized as being client independent:
168 %d, %i: (signed) integer in base ten.
169 %u: unsigned integer in base ten.
170 %o: unsigned integer in base eight.
171 %x: unsigned integer in base sixteen.
172 %ld, %li, %lo, %lu, %lx: long versions of the above.
173 %lld, %lli, %llo, %llu, %llx: long long versions.
174 %wd, %wi, %wo, %wu, %wx: HOST_WIDE_INT versions.
175 %c: character.
176 %s: string.
177 %p: pointer.
178 %m: strerror(text->err_no) - does not consume a value from args_ptr.
179 %%: '%'.
180 %<: opening quote.
181 %>: closing quote.
182 %': apostrophe (should only be used in untranslated messages;
183 translations should use appropriate punctuation directly).
184 %.*s: a substring the length of which is specified by an argument
185 integer.
186 %Ns: likewise, but length specified as constant in the format string.
187 Flag 'q': quote formatted text (must come immediately after '%').
189 Arguments can be used sequentially, or through %N$ resp. *N$
190 notation Nth argument after the format string. If %N$ / *N$
191 notation is used, it must be used for all arguments, except %m, %%,
192 %<, %> and %', which may not have a number, as they do not consume
193 an argument. When %M$.*N$s is used, M must be N + 1. (This may
194 also be written %M$.*s, provided N is not otherwise used.) The
195 format string must have conversion specifiers with argument numbers
196 1 up to highest argument; each argument may only be used once.
197 A format string can have at most 30 arguments. */
199 /* Formatting phases 1 and 2: render TEXT->format_spec plus
200 TEXT->args_ptr into a series of chunks in PP->buffer->args[].
201 Phase 3 is in pp_base_format_text. */
203 void
204 pp_base_format (pretty_printer *pp, text_info *text)
206 output_buffer *buffer = pp->buffer;
207 const char *p;
208 const char **args;
209 struct chunk_info *new_chunk_array;
211 unsigned int curarg = 0, chunk = 0, argno;
212 pp_wrapping_mode_t old_wrapping_mode;
213 bool any_unnumbered = false, any_numbered = false;
214 const char **formatters[PP_NL_ARGMAX];
216 /* Allocate a new chunk structure. */
217 new_chunk_array = XOBNEW (&buffer->chunk_obstack, struct chunk_info);
218 new_chunk_array->prev = buffer->cur_chunk_array;
219 buffer->cur_chunk_array = new_chunk_array;
220 args = new_chunk_array->args;
222 /* Formatting phase 1: split up TEXT->format_spec into chunks in
223 PP->buffer->args[]. Even-numbered chunks are to be output
224 verbatim, odd-numbered chunks are format specifiers.
225 %m, %%, %<, %>, and %' are replaced with the appropriate text at
226 this point. */
228 memset (formatters, 0, sizeof formatters);
230 for (p = text->format_spec; *p; )
232 while (*p != '\0' && *p != '%')
234 obstack_1grow (&buffer->chunk_obstack, *p);
235 p++;
238 if (*p == '\0')
239 break;
241 switch (*++p)
243 case '\0':
244 gcc_unreachable ();
246 case '%':
247 obstack_1grow (&buffer->chunk_obstack, '%');
248 p++;
249 continue;
251 case '<':
252 obstack_grow (&buffer->chunk_obstack,
253 open_quote, strlen (open_quote));
254 p++;
255 continue;
257 case '>':
258 case '\'':
259 obstack_grow (&buffer->chunk_obstack,
260 close_quote, strlen (close_quote));
261 p++;
262 continue;
264 case 'm':
266 const char *errstr = xstrerror (text->err_no);
267 obstack_grow (&buffer->chunk_obstack, errstr, strlen (errstr));
269 p++;
270 continue;
272 default:
273 /* Handled in phase 2. Terminate the plain chunk here. */
274 obstack_1grow (&buffer->chunk_obstack, '\0');
275 gcc_assert (chunk < PP_NL_ARGMAX * 2);
276 args[chunk++] = XOBFINISH (&buffer->chunk_obstack, const char *);
277 break;
280 if (ISDIGIT (*p))
282 char *end;
283 argno = strtoul (p, &end, 10) - 1;
284 p = end;
285 gcc_assert (*p == '$');
286 p++;
288 any_numbered = true;
289 gcc_assert (!any_unnumbered);
291 else
293 argno = curarg++;
294 any_unnumbered = true;
295 gcc_assert (!any_numbered);
297 gcc_assert (argno < PP_NL_ARGMAX);
298 gcc_assert (!formatters[argno]);
299 formatters[argno] = &args[chunk];
302 obstack_1grow (&buffer->chunk_obstack, *p);
303 p++;
305 while (strchr ("qwl+#", p[-1]));
307 if (p[-1] == '.')
309 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
310 (where M == N + 1). */
311 if (ISDIGIT (*p))
315 obstack_1grow (&buffer->chunk_obstack, *p);
316 p++;
318 while (ISDIGIT (p[-1]));
319 gcc_assert (p[-1] == 's');
321 else
323 gcc_assert (*p == '*');
324 obstack_1grow (&buffer->chunk_obstack, '*');
325 p++;
327 if (ISDIGIT (*p))
329 char *end;
330 unsigned int argno2 = strtoul (p, &end, 10) - 1;
331 p = end;
332 gcc_assert (argno2 == argno - 1);
333 gcc_assert (!any_unnumbered);
334 gcc_assert (*p == '$');
336 p++;
337 formatters[argno2] = formatters[argno];
339 else
341 gcc_assert (!any_numbered);
342 formatters[argno+1] = formatters[argno];
343 curarg++;
345 gcc_assert (*p == 's');
346 obstack_1grow (&buffer->chunk_obstack, 's');
347 p++;
350 if (*p == '\0')
351 break;
353 obstack_1grow (&buffer->chunk_obstack, '\0');
354 gcc_assert (chunk < PP_NL_ARGMAX * 2);
355 args[chunk++] = XOBFINISH (&buffer->chunk_obstack, const char *);
358 obstack_1grow (&buffer->chunk_obstack, '\0');
359 gcc_assert (chunk < PP_NL_ARGMAX * 2);
360 args[chunk++] = XOBFINISH (&buffer->chunk_obstack, const char *);
361 args[chunk] = 0;
363 /* Set output to the argument obstack, and switch line-wrapping and
364 prefixing off. */
365 buffer->obstack = &buffer->chunk_obstack;
366 old_wrapping_mode = pp_set_verbatim_wrapping (pp);
368 /* Second phase. Replace each formatter with the formatted text it
369 corresponds to. */
371 for (argno = 0; formatters[argno]; argno++)
373 int precision = 0;
374 bool wide = false;
375 bool plus = false;
376 bool hash = false;
377 bool quote = false;
379 /* We do not attempt to enforce any ordering on the modifier
380 characters. */
382 for (p = *formatters[argno];; p++)
384 switch (*p)
386 case 'q':
387 gcc_assert (!quote);
388 quote = true;
389 continue;
391 case '+':
392 gcc_assert (!plus);
393 plus = true;
394 continue;
396 case '#':
397 gcc_assert (!hash);
398 hash = true;
399 continue;
401 case 'w':
402 gcc_assert (!wide);
403 wide = true;
404 continue;
406 case 'l':
407 /* We don't support precision beyond that of "long long". */
408 gcc_assert (precision < 2);
409 precision++;
410 continue;
412 break;
415 gcc_assert (!wide || precision == 0);
417 if (quote)
418 pp_string (pp, open_quote);
420 switch (*p)
422 case 'c':
423 pp_character (pp, va_arg (*text->args_ptr, int));
424 break;
426 case 'd':
427 case 'i':
428 if (wide)
429 pp_wide_integer (pp, va_arg (*text->args_ptr, HOST_WIDE_INT));
430 else
431 pp_integer_with_precision
432 (pp, *text->args_ptr, precision, int, "d");
433 break;
435 case 'o':
436 if (wide)
437 pp_scalar (pp, "%" HOST_WIDE_INT_PRINT "o",
438 va_arg (*text->args_ptr, unsigned HOST_WIDE_INT));
439 else
440 pp_integer_with_precision
441 (pp, *text->args_ptr, precision, unsigned, "o");
442 break;
444 case 's':
445 pp_string (pp, va_arg (*text->args_ptr, const char *));
446 break;
448 case 'p':
449 pp_pointer (pp, va_arg (*text->args_ptr, void *));
450 break;
452 case 'u':
453 if (wide)
454 pp_scalar (pp, HOST_WIDE_INT_PRINT_UNSIGNED,
455 va_arg (*text->args_ptr, unsigned HOST_WIDE_INT));
456 else
457 pp_integer_with_precision
458 (pp, *text->args_ptr, precision, unsigned, "u");
459 break;
461 case 'x':
462 if (wide)
463 pp_scalar (pp, HOST_WIDE_INT_PRINT_HEX,
464 va_arg (*text->args_ptr, unsigned HOST_WIDE_INT));
465 else
466 pp_integer_with_precision
467 (pp, *text->args_ptr, precision, unsigned, "x");
468 break;
470 case '.':
472 int n;
473 const char *s;
475 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
476 (where M == N + 1). The format string should be verified
477 already from the first phase. */
478 p++;
479 if (ISDIGIT (*p))
481 char *end;
482 n = strtoul (p, &end, 10);
483 p = end;
484 gcc_assert (*p == 's');
486 else
488 gcc_assert (*p == '*');
489 p++;
490 gcc_assert (*p == 's');
491 n = va_arg (*text->args_ptr, int);
493 /* This consumes a second entry in the formatters array. */
494 gcc_assert (formatters[argno] == formatters[argno+1]);
495 argno++;
498 s = va_arg (*text->args_ptr, const char *);
499 pp_append_text (pp, s, s + n);
501 break;
503 default:
505 bool ok;
507 gcc_assert (pp_format_decoder (pp));
508 ok = pp_format_decoder (pp) (pp, text, p,
509 precision, wide, plus, hash);
510 gcc_assert (ok);
514 if (quote)
515 pp_string (pp, close_quote);
517 obstack_1grow (&buffer->chunk_obstack, '\0');
518 *formatters[argno] = XOBFINISH (&buffer->chunk_obstack, const char *);
521 #ifdef ENABLE_CHECKING
522 for (; argno < PP_NL_ARGMAX; argno++)
523 gcc_assert (!formatters[argno]);
524 #endif
526 /* Revert to normal obstack and wrapping mode. */
527 buffer->obstack = &buffer->formatted_obstack;
528 buffer->line_length = 0;
529 pp_wrapping_mode (pp) = old_wrapping_mode;
530 pp_clear_state (pp);
533 /* Format of a message pointed to by TEXT. */
534 void
535 pp_base_output_formatted_text (pretty_printer *pp)
537 unsigned int chunk;
538 output_buffer *buffer = pp_buffer (pp);
539 struct chunk_info *chunk_array = buffer->cur_chunk_array;
540 const char **args = chunk_array->args;
542 gcc_assert (buffer->obstack == &buffer->formatted_obstack);
543 gcc_assert (buffer->line_length == 0);
545 /* This is a third phase, first 2 phases done in pp_base_format_args.
546 Now we actually print it. */
547 for (chunk = 0; args[chunk]; chunk++)
548 pp_string (pp, args[chunk]);
550 /* Deallocate the chunk structure and everything after it (i.e. the
551 associated series of formatted strings). */
552 buffer->cur_chunk_array = chunk_array->prev;
553 obstack_free (&buffer->chunk_obstack, chunk_array);
556 /* Helper subroutine of output_verbatim and verbatim. Do the appropriate
557 settings needed by BUFFER for a verbatim formatting. */
558 void
559 pp_base_format_verbatim (pretty_printer *pp, text_info *text)
561 /* Set verbatim mode. */
562 pp_wrapping_mode_t oldmode = pp_set_verbatim_wrapping (pp);
564 /* Do the actual formatting. */
565 pp_format (pp, text);
566 pp_output_formatted_text (pp);
568 /* Restore previous settings. */
569 pp_wrapping_mode (pp) = oldmode;
572 /* Flush the content of BUFFER onto the attached stream. */
573 void
574 pp_base_flush (pretty_printer *pp)
576 pp_write_text_to_stream (pp);
577 pp_clear_state (pp);
578 fflush (pp->buffer->stream);
581 /* Sets the number of maximum characters per line PRETTY-PRINTER can
582 output in line-wrapping mode. A LENGTH value 0 suppresses
583 line-wrapping. */
584 void
585 pp_base_set_line_maximum_length (pretty_printer *pp, int length)
587 pp_line_cutoff (pp) = length;
588 pp_set_real_maximum_length (pp);
591 /* Clear PRETTY-PRINTER output area text info. */
592 void
593 pp_base_clear_output_area (pretty_printer *pp)
595 obstack_free (pp->buffer->obstack, obstack_base (pp->buffer->obstack));
596 pp->buffer->line_length = 0;
599 /* Set PREFIX for PRETTY-PRINTER. */
600 void
601 pp_base_set_prefix (pretty_printer *pp, const char *prefix)
603 pp->prefix = prefix;
604 pp_set_real_maximum_length (pp);
605 pp->emitted_prefix = false;
606 pp_indentation (pp) = 0;
609 /* Free PRETTY-PRINTER's prefix, a previously malloc()'d string. */
610 void
611 pp_base_destroy_prefix (pretty_printer *pp)
613 if (pp->prefix != NULL)
615 free (CONST_CAST (char *, pp->prefix));
616 pp->prefix = NULL;
620 /* Write out PRETTY-PRINTER's prefix. */
621 void
622 pp_base_emit_prefix (pretty_printer *pp)
624 if (pp->prefix != NULL)
626 switch (pp_prefixing_rule (pp))
628 default:
629 case DIAGNOSTICS_SHOW_PREFIX_NEVER:
630 break;
632 case DIAGNOSTICS_SHOW_PREFIX_ONCE:
633 if (pp->emitted_prefix)
635 pp_base_indent (pp);
636 break;
638 pp_indentation (pp) += 3;
639 /* Fall through. */
641 case DIAGNOSTICS_SHOW_PREFIX_EVERY_LINE:
643 int prefix_length = strlen (pp->prefix);
644 pp_append_r (pp, pp->prefix, prefix_length);
645 pp->emitted_prefix = true;
647 break;
652 /* Construct a PRETTY-PRINTER with PREFIX and of MAXIMUM_LENGTH
653 characters per line. */
654 void
655 pp_construct (pretty_printer *pp, const char *prefix, int maximum_length)
657 memset (pp, 0, sizeof (pretty_printer));
658 pp->buffer = XCNEW (output_buffer);
659 obstack_init (&pp->buffer->chunk_obstack);
660 obstack_init (&pp->buffer->formatted_obstack);
661 pp->buffer->obstack = &pp->buffer->formatted_obstack;
662 pp->buffer->stream = stderr;
663 pp_line_cutoff (pp) = maximum_length;
664 pp_prefixing_rule (pp) = DIAGNOSTICS_SHOW_PREFIX_ONCE;
665 pp_set_prefix (pp, prefix);
666 pp_translate_identifiers (pp) = true;
669 /* Append a string delimited by START and END to the output area of
670 PRETTY-PRINTER. No line wrapping is done. However, if beginning a
671 new line then emit PRETTY-PRINTER's prefix and skip any leading
672 whitespace if appropriate. The caller must ensure that it is
673 safe to do so. */
674 void
675 pp_base_append_text (pretty_printer *pp, const char *start, const char *end)
677 /* Emit prefix and skip whitespace if we're starting a new line. */
678 if (pp->buffer->line_length == 0)
680 pp_emit_prefix (pp);
681 if (pp_is_wrapping_line (pp))
682 while (start != end && *start == ' ')
683 ++start;
685 pp_append_r (pp, start, end - start);
688 /* Finishes constructing a NULL-terminated character string representing
689 the PRETTY-PRINTED text. */
690 const char *
691 pp_base_formatted_text (pretty_printer *pp)
693 obstack_1grow (pp->buffer->obstack, '\0');
694 return pp_formatted_text_data (pp);
697 /* Return a pointer to the last character emitted in PRETTY-PRINTER's
698 output area. A NULL pointer means no character available. */
699 const char *
700 pp_base_last_position_in_text (const pretty_printer *pp)
702 const char *p = NULL;
703 struct obstack *text = pp->buffer->obstack;
705 if (obstack_base (text) != obstack_next_free (text))
706 p = ((const char *) obstack_next_free (text)) - 1;
707 return p;
710 /* Return the amount of characters PRETTY-PRINTER can accept to
711 make a full line. Meaningful only in line-wrapping mode. */
713 pp_base_remaining_character_count_for_line (pretty_printer *pp)
715 return pp->maximum_length - pp->buffer->line_length;
719 /* Format a message into BUFFER a la printf. */
720 void
721 pp_printf (pretty_printer *pp, const char *msg, ...)
723 text_info text;
724 va_list ap;
726 va_start (ap, msg);
727 text.err_no = errno;
728 text.args_ptr = &ap;
729 text.format_spec = msg;
730 text.locus = NULL;
731 pp_format (pp, &text);
732 pp_output_formatted_text (pp);
733 va_end (ap);
737 /* Output MESSAGE verbatim into BUFFER. */
738 void
739 pp_verbatim (pretty_printer *pp, const char *msg, ...)
741 text_info text;
742 va_list ap;
744 va_start (ap, msg);
745 text.err_no = errno;
746 text.args_ptr = &ap;
747 text.format_spec = msg;
748 text.locus = NULL;
749 pp_format_verbatim (pp, &text);
750 va_end (ap);
755 /* Have PRETTY-PRINTER start a new line. */
756 void
757 pp_base_newline (pretty_printer *pp)
759 obstack_1grow (pp->buffer->obstack, '\n');
760 pp_needs_newline (pp) = false;
761 pp->buffer->line_length = 0;
764 /* Have PRETTY-PRINTER add a CHARACTER. */
765 void
766 pp_base_character (pretty_printer *pp, int c)
768 if (pp_is_wrapping_line (pp)
769 && pp_remaining_character_count_for_line (pp) <= 0)
771 pp_newline (pp);
772 if (ISSPACE (c))
773 return;
775 obstack_1grow (pp->buffer->obstack, c);
776 ++pp->buffer->line_length;
779 /* Append a STRING to the output area of PRETTY-PRINTER; the STRING may
780 be line-wrapped if in appropriate mode. */
781 void
782 pp_base_string (pretty_printer *pp, const char *str)
784 pp_maybe_wrap_text (pp, str, str + (str ? strlen (str) : 0));
787 /* Maybe print out a whitespace if needed. */
789 void
790 pp_base_maybe_space (pretty_printer *pp)
792 if (pp_base (pp)->padding != pp_none)
794 pp_space (pp);
795 pp_base (pp)->padding = pp_none;
799 /* The string starting at P has LEN (at least 1) bytes left; if they
800 start with a valid UTF-8 sequence, return the length of that
801 sequence and set *VALUE to the value of that sequence, and
802 otherwise return 0 and set *VALUE to (unsigned int) -1. */
804 static int
805 decode_utf8_char (const unsigned char *p, size_t len, unsigned int *value)
807 unsigned int t = *p;
809 if (len == 0)
810 abort ();
811 if (t & 0x80)
813 size_t utf8_len = 0;
814 unsigned int ch;
815 size_t i;
816 for (t = *p; t & 0x80; t <<= 1)
817 utf8_len++;
819 if (utf8_len > len || utf8_len < 2 || utf8_len > 6)
821 *value = (unsigned int) -1;
822 return 0;
824 ch = *p & ((1 << (7 - utf8_len)) - 1);
825 for (i = 1; i < utf8_len; i++)
827 unsigned int u = p[i];
828 if ((u & 0xC0) != 0x80)
830 *value = (unsigned int) -1;
831 return 0;
833 ch = (ch << 6) | (u & 0x3F);
835 if ( (ch <= 0x7F && utf8_len > 1)
836 || (ch <= 0x7FF && utf8_len > 2)
837 || (ch <= 0xFFFF && utf8_len > 3)
838 || (ch <= 0x1FFFFF && utf8_len > 4)
839 || (ch <= 0x3FFFFFF && utf8_len > 5)
840 || (ch >= 0xD800 && ch <= 0xDFFF))
842 *value = (unsigned int) -1;
843 return 0;
845 *value = ch;
846 return utf8_len;
848 else
850 *value = t;
851 return 1;
855 /* Allocator for identifier_to_locale and corresponding function to
856 free memory. */
858 void *(*identifier_to_locale_alloc) (size_t) = xmalloc;
859 void (*identifier_to_locale_free) (void *) = free;
861 /* Given IDENT, an identifier in the internal encoding, return a
862 version of IDENT suitable for diagnostics in the locale character
863 set: either IDENT itself, or a string, allocated using
864 identifier_to_locale_alloc, converted to the locale character set
865 and using escape sequences if not representable in the locale
866 character set or containing control characters or invalid byte
867 sequences. Existing backslashes in IDENT are not doubled, so the
868 result may not uniquely specify the contents of an arbitrary byte
869 sequence identifier. */
871 const char *
872 identifier_to_locale (const char *ident)
874 const unsigned char *uid = (const unsigned char *) ident;
875 size_t idlen = strlen (ident);
876 bool valid_printable_utf8 = true;
877 bool all_ascii = true;
878 size_t i;
880 for (i = 0; i < idlen;)
882 unsigned int c;
883 size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c);
884 if (utf8_len == 0 || c <= 0x1F || (c >= 0x7F && c <= 0x9F))
886 valid_printable_utf8 = false;
887 break;
889 if (utf8_len > 1)
890 all_ascii = false;
891 i += utf8_len;
894 /* If IDENT contains invalid UTF-8 sequences (which may occur with
895 attributes putting arbitrary byte sequences in identifiers), or
896 control characters, we use octal escape sequences for all bytes
897 outside printable ASCII. */
898 if (!valid_printable_utf8)
900 char *ret = (char *) identifier_to_locale_alloc (4 * idlen + 1);
901 char *p = ret;
902 for (i = 0; i < idlen; i++)
904 if (uid[i] > 0x1F && uid[i] < 0x7F)
905 *p++ = uid[i];
906 else
908 sprintf (p, "\\%03o", uid[i]);
909 p += 4;
912 *p = 0;
913 return ret;
916 /* Otherwise, if it is valid printable ASCII, or printable UTF-8
917 with the locale character set being UTF-8, IDENT is used. */
918 if (all_ascii || locale_utf8)
919 return ident;
921 /* Otherwise IDENT is converted to the locale character set if
922 possible. */
923 #if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV
924 if (locale_encoding != NULL)
926 iconv_t cd = iconv_open (locale_encoding, "UTF-8");
927 bool conversion_ok = true;
928 char *ret = NULL;
929 if (cd != (iconv_t) -1)
931 size_t ret_alloc = 4 * idlen + 1;
932 for (;;)
934 /* Repeat the whole conversion process as needed with
935 larger buffers so non-reversible transformations can
936 always be detected. */
937 ICONV_CONST char *inbuf = CONST_CAST (char *, ident);
938 char *outbuf;
939 size_t inbytesleft = idlen;
940 size_t outbytesleft = ret_alloc - 1;
941 size_t iconv_ret;
943 ret = (char *) identifier_to_locale_alloc (ret_alloc);
944 outbuf = ret;
946 if (iconv (cd, 0, 0, 0, 0) == (size_t) -1)
948 conversion_ok = false;
949 break;
952 iconv_ret = iconv (cd, &inbuf, &inbytesleft,
953 &outbuf, &outbytesleft);
954 if (iconv_ret == (size_t) -1 || inbytesleft != 0)
956 if (errno == E2BIG)
958 ret_alloc *= 2;
959 identifier_to_locale_free (ret);
960 ret = NULL;
961 continue;
963 else
965 conversion_ok = false;
966 break;
969 else if (iconv_ret != 0)
971 conversion_ok = false;
972 break;
974 /* Return to initial shift state. */
975 if (iconv (cd, 0, 0, &outbuf, &outbytesleft) == (size_t) -1)
977 if (errno == E2BIG)
979 ret_alloc *= 2;
980 identifier_to_locale_free (ret);
981 ret = NULL;
982 continue;
984 else
986 conversion_ok = false;
987 break;
990 *outbuf = 0;
991 break;
993 iconv_close (cd);
994 if (conversion_ok)
995 return ret;
998 #endif
1000 /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */
1002 char *ret = (char *) identifier_to_locale_alloc (10 * idlen + 1);
1003 char *p = ret;
1004 for (i = 0; i < idlen;)
1006 unsigned int c;
1007 size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c);
1008 if (utf8_len == 1)
1009 *p++ = uid[i];
1010 else
1012 sprintf (p, "\\U%08x", c);
1013 p += 10;
1015 i += utf8_len;
1017 *p = 0;
1018 return ret;