Mark ChangeLog
[official-gcc.git] / gcc / pretty-print.c
blob4c45e51ec7f3727674427b18e2741faa7c4a46d1
1 /* Various declarations for language-independent pretty-print subroutines.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Gabriel Dos Reis <gdr@integrable-solutions.net>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "intl.h"
25 #include "pretty-print.h"
27 #if HAVE_ICONV
28 #include <iconv.h>
29 #endif
31 /* A pointer to the formatted diagnostic message. */
32 #define pp_formatted_text_data(PP) \
33 ((const char *) obstack_base (pp_base (PP)->buffer->obstack))
35 /* Format an integer given by va_arg (ARG, type-specifier T) where
36 type-specifier is a precision modifier as indicated by PREC. F is
37 a string used to construct the appropriate format-specifier. */
38 #define pp_integer_with_precision(PP, ARG, PREC, T, F) \
39 do \
40 switch (PREC) \
41 { \
42 case 0: \
43 pp_scalar (PP, "%" F, va_arg (ARG, T)); \
44 break; \
46 case 1: \
47 pp_scalar (PP, "%l" F, va_arg (ARG, long T)); \
48 break; \
50 case 2: \
51 pp_scalar (PP, "%" HOST_LONG_LONG_FORMAT F, va_arg (ARG, long long T)); \
52 break; \
54 default: \
55 break; \
56 } \
57 while (0)
60 /* Subroutine of pp_set_maximum_length. Set up PRETTY-PRINTER's
61 internal maximum characters per line. */
62 static void
63 pp_set_real_maximum_length (pretty_printer *pp)
65 /* If we're told not to wrap lines then do the obvious thing. In case
66 we'll emit prefix only once per message, it is appropriate
67 not to increase unnecessarily the line-length cut-off. */
68 if (!pp_is_wrapping_line (pp)
69 || pp_prefixing_rule (pp) == DIAGNOSTICS_SHOW_PREFIX_ONCE
70 || pp_prefixing_rule (pp) == DIAGNOSTICS_SHOW_PREFIX_NEVER)
71 pp->maximum_length = pp_line_cutoff (pp);
72 else
74 int prefix_length = pp->prefix ? strlen (pp->prefix) : 0;
75 /* If the prefix is ridiculously too long, output at least
76 32 characters. */
77 if (pp_line_cutoff (pp) - prefix_length < 32)
78 pp->maximum_length = pp_line_cutoff (pp) + 32;
79 else
80 pp->maximum_length = pp_line_cutoff (pp);
84 /* Clear PRETTY-PRINTER's output state. */
85 static inline void
86 pp_clear_state (pretty_printer *pp)
88 pp->emitted_prefix = false;
89 pp_indentation (pp) = 0;
92 /* Flush the formatted text of PRETTY-PRINTER onto the attached stream. */
93 void
94 pp_write_text_to_stream (pretty_printer *pp)
96 const char *text = pp_formatted_text (pp);
97 fputs (text, pp->buffer->stream);
98 pp_clear_output_area (pp);
101 /* As pp_write_text_to_stream, but for GraphViz label output.
103 Flush the formatted text of pretty-printer PP onto the attached stream.
104 Replace characters in PPF that have special meaning in a GraphViz .dot
105 file.
107 This routine is not very fast, but it doesn't have to be as this is only
108 be used by routines dumping intermediate representations in graph form. */
110 void
111 pp_write_text_as_dot_label_to_stream (pretty_printer *pp, bool for_record)
113 const char *text = pp_formatted_text (pp);
114 const char *p = text;
115 FILE *fp = pp->buffer->stream;
117 while (*p)
119 switch (*p)
121 /* Print newlines as a left-aligned newline. */
122 case '\n':
123 fputs ("\\l\\\n", fp);
124 break;
126 /* A pipe is only special for record-shape nodes. */
127 case '|':
128 if (for_record)
129 fputc ('\\', fp);
130 fputc (*p, fp);
131 break;
133 /* The following characters always have to be escaped
134 for use in labels. */
135 case '{':
136 case '}':
137 case '<':
138 case '>':
139 case '"':
140 case ' ':
141 fputc ('\\', fp);
142 /* fall through */
143 default:
144 fputc (*p, fp);
145 break;
147 p++;
150 pp_clear_output_area (pp);
153 /* Wrap a text delimited by START and END into PRETTY-PRINTER. */
154 static void
155 pp_wrap_text (pretty_printer *pp, const char *start, const char *end)
157 bool wrapping_line = pp_is_wrapping_line (pp);
159 while (start != end)
161 /* Dump anything bordered by whitespaces. */
163 const char *p = start;
164 while (p != end && !ISBLANK (*p) && *p != '\n')
165 ++p;
166 if (wrapping_line
167 && p - start >= pp_remaining_character_count_for_line (pp))
168 pp_newline (pp);
169 pp_append_text (pp, start, p);
170 start = p;
173 if (start != end && ISBLANK (*start))
175 pp_space (pp);
176 ++start;
178 if (start != end && *start == '\n')
180 pp_newline (pp);
181 ++start;
186 /* Same as pp_wrap_text but wrap text only when in line-wrapping mode. */
187 static inline void
188 pp_maybe_wrap_text (pretty_printer *pp, const char *start, const char *end)
190 if (pp_is_wrapping_line (pp))
191 pp_wrap_text (pp, start, end);
192 else
193 pp_append_text (pp, start, end);
196 /* Append to the output area of PRETTY-PRINTER a string specified by its
197 STARTing character and LENGTH. */
198 static inline void
199 pp_append_r (pretty_printer *pp, const char *start, int length)
201 obstack_grow (pp->buffer->obstack, start, length);
202 pp->buffer->line_length += length;
205 /* Insert enough spaces into the output area of PRETTY-PRINTER to bring
206 the column position to the current indentation level, assuming that a
207 newline has just been written to the buffer. */
208 void
209 pp_base_indent (pretty_printer *pp)
211 int n = pp_indentation (pp);
212 int i;
214 for (i = 0; i < n; ++i)
215 pp_space (pp);
218 /* The following format specifiers are recognized as being client independent:
219 %d, %i: (signed) integer in base ten.
220 %u: unsigned integer in base ten.
221 %o: unsigned integer in base eight.
222 %x: unsigned integer in base sixteen.
223 %ld, %li, %lo, %lu, %lx: long versions of the above.
224 %lld, %lli, %llo, %llu, %llx: long long versions.
225 %wd, %wi, %wo, %wu, %wx: HOST_WIDE_INT versions.
226 %c: character.
227 %s: string.
228 %p: pointer.
229 %m: strerror(text->err_no) - does not consume a value from args_ptr.
230 %%: '%'.
231 %<: opening quote.
232 %>: closing quote.
233 %': apostrophe (should only be used in untranslated messages;
234 translations should use appropriate punctuation directly).
235 %.*s: a substring the length of which is specified by an argument
236 integer.
237 %Ns: likewise, but length specified as constant in the format string.
238 Flag 'q': quote formatted text (must come immediately after '%').
240 Arguments can be used sequentially, or through %N$ resp. *N$
241 notation Nth argument after the format string. If %N$ / *N$
242 notation is used, it must be used for all arguments, except %m, %%,
243 %<, %> and %', which may not have a number, as they do not consume
244 an argument. When %M$.*N$s is used, M must be N + 1. (This may
245 also be written %M$.*s, provided N is not otherwise used.) The
246 format string must have conversion specifiers with argument numbers
247 1 up to highest argument; each argument may only be used once.
248 A format string can have at most 30 arguments. */
250 /* Formatting phases 1 and 2: render TEXT->format_spec plus
251 TEXT->args_ptr into a series of chunks in PP->buffer->args[].
252 Phase 3 is in pp_base_format_text. */
254 void
255 pp_base_format (pretty_printer *pp, text_info *text)
257 output_buffer *buffer = pp->buffer;
258 const char *p;
259 const char **args;
260 struct chunk_info *new_chunk_array;
262 unsigned int curarg = 0, chunk = 0, argno;
263 pp_wrapping_mode_t old_wrapping_mode;
264 bool any_unnumbered = false, any_numbered = false;
265 const char **formatters[PP_NL_ARGMAX];
267 /* Allocate a new chunk structure. */
268 new_chunk_array = XOBNEW (&buffer->chunk_obstack, struct chunk_info);
269 new_chunk_array->prev = buffer->cur_chunk_array;
270 buffer->cur_chunk_array = new_chunk_array;
271 args = new_chunk_array->args;
273 /* Formatting phase 1: split up TEXT->format_spec into chunks in
274 PP->buffer->args[]. Even-numbered chunks are to be output
275 verbatim, odd-numbered chunks are format specifiers.
276 %m, %%, %<, %>, and %' are replaced with the appropriate text at
277 this point. */
279 memset (formatters, 0, sizeof formatters);
281 for (p = text->format_spec; *p; )
283 while (*p != '\0' && *p != '%')
285 obstack_1grow (&buffer->chunk_obstack, *p);
286 p++;
289 if (*p == '\0')
290 break;
292 switch (*++p)
294 case '\0':
295 gcc_unreachable ();
297 case '%':
298 obstack_1grow (&buffer->chunk_obstack, '%');
299 p++;
300 continue;
302 case '<':
303 obstack_grow (&buffer->chunk_obstack,
304 open_quote, strlen (open_quote));
305 p++;
306 continue;
308 case '>':
309 case '\'':
310 obstack_grow (&buffer->chunk_obstack,
311 close_quote, strlen (close_quote));
312 p++;
313 continue;
315 case 'm':
317 const char *errstr = xstrerror (text->err_no);
318 obstack_grow (&buffer->chunk_obstack, errstr, strlen (errstr));
320 p++;
321 continue;
323 default:
324 /* Handled in phase 2. Terminate the plain chunk here. */
325 obstack_1grow (&buffer->chunk_obstack, '\0');
326 gcc_assert (chunk < PP_NL_ARGMAX * 2);
327 args[chunk++] = XOBFINISH (&buffer->chunk_obstack, const char *);
328 break;
331 if (ISDIGIT (*p))
333 char *end;
334 argno = strtoul (p, &end, 10) - 1;
335 p = end;
336 gcc_assert (*p == '$');
337 p++;
339 any_numbered = true;
340 gcc_assert (!any_unnumbered);
342 else
344 argno = curarg++;
345 any_unnumbered = true;
346 gcc_assert (!any_numbered);
348 gcc_assert (argno < PP_NL_ARGMAX);
349 gcc_assert (!formatters[argno]);
350 formatters[argno] = &args[chunk];
353 obstack_1grow (&buffer->chunk_obstack, *p);
354 p++;
356 while (strchr ("qwl+#", p[-1]));
358 if (p[-1] == '.')
360 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
361 (where M == N + 1). */
362 if (ISDIGIT (*p))
366 obstack_1grow (&buffer->chunk_obstack, *p);
367 p++;
369 while (ISDIGIT (p[-1]));
370 gcc_assert (p[-1] == 's');
372 else
374 gcc_assert (*p == '*');
375 obstack_1grow (&buffer->chunk_obstack, '*');
376 p++;
378 if (ISDIGIT (*p))
380 char *end;
381 unsigned int argno2 = strtoul (p, &end, 10) - 1;
382 p = end;
383 gcc_assert (argno2 == argno - 1);
384 gcc_assert (!any_unnumbered);
385 gcc_assert (*p == '$');
387 p++;
388 formatters[argno2] = formatters[argno];
390 else
392 gcc_assert (!any_numbered);
393 formatters[argno+1] = formatters[argno];
394 curarg++;
396 gcc_assert (*p == 's');
397 obstack_1grow (&buffer->chunk_obstack, 's');
398 p++;
401 if (*p == '\0')
402 break;
404 obstack_1grow (&buffer->chunk_obstack, '\0');
405 gcc_assert (chunk < PP_NL_ARGMAX * 2);
406 args[chunk++] = XOBFINISH (&buffer->chunk_obstack, const char *);
409 obstack_1grow (&buffer->chunk_obstack, '\0');
410 gcc_assert (chunk < PP_NL_ARGMAX * 2);
411 args[chunk++] = XOBFINISH (&buffer->chunk_obstack, const char *);
412 args[chunk] = 0;
414 /* Set output to the argument obstack, and switch line-wrapping and
415 prefixing off. */
416 buffer->obstack = &buffer->chunk_obstack;
417 old_wrapping_mode = pp_set_verbatim_wrapping (pp);
419 /* Second phase. Replace each formatter with the formatted text it
420 corresponds to. */
422 for (argno = 0; formatters[argno]; argno++)
424 int precision = 0;
425 bool wide = false;
426 bool plus = false;
427 bool hash = false;
428 bool quote = false;
430 /* We do not attempt to enforce any ordering on the modifier
431 characters. */
433 for (p = *formatters[argno];; p++)
435 switch (*p)
437 case 'q':
438 gcc_assert (!quote);
439 quote = true;
440 continue;
442 case '+':
443 gcc_assert (!plus);
444 plus = true;
445 continue;
447 case '#':
448 gcc_assert (!hash);
449 hash = true;
450 continue;
452 case 'w':
453 gcc_assert (!wide);
454 wide = true;
455 continue;
457 case 'l':
458 /* We don't support precision beyond that of "long long". */
459 gcc_assert (precision < 2);
460 precision++;
461 continue;
463 break;
466 gcc_assert (!wide || precision == 0);
468 if (quote)
469 pp_string (pp, open_quote);
471 switch (*p)
473 case 'c':
474 pp_character (pp, va_arg (*text->args_ptr, int));
475 break;
477 case 'd':
478 case 'i':
479 if (wide)
480 pp_wide_integer (pp, va_arg (*text->args_ptr, HOST_WIDE_INT));
481 else
482 pp_integer_with_precision
483 (pp, *text->args_ptr, precision, int, "d");
484 break;
486 case 'o':
487 if (wide)
488 pp_scalar (pp, "%" HOST_WIDE_INT_PRINT "o",
489 va_arg (*text->args_ptr, unsigned HOST_WIDE_INT));
490 else
491 pp_integer_with_precision
492 (pp, *text->args_ptr, precision, unsigned, "o");
493 break;
495 case 's':
496 pp_string (pp, va_arg (*text->args_ptr, const char *));
497 break;
499 case 'p':
500 pp_pointer (pp, va_arg (*text->args_ptr, void *));
501 break;
503 case 'u':
504 if (wide)
505 pp_scalar (pp, HOST_WIDE_INT_PRINT_UNSIGNED,
506 va_arg (*text->args_ptr, unsigned HOST_WIDE_INT));
507 else
508 pp_integer_with_precision
509 (pp, *text->args_ptr, precision, unsigned, "u");
510 break;
512 case 'x':
513 if (wide)
514 pp_scalar (pp, HOST_WIDE_INT_PRINT_HEX,
515 va_arg (*text->args_ptr, unsigned HOST_WIDE_INT));
516 else
517 pp_integer_with_precision
518 (pp, *text->args_ptr, precision, unsigned, "x");
519 break;
521 case '.':
523 int n;
524 const char *s;
526 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
527 (where M == N + 1). The format string should be verified
528 already from the first phase. */
529 p++;
530 if (ISDIGIT (*p))
532 char *end;
533 n = strtoul (p, &end, 10);
534 p = end;
535 gcc_assert (*p == 's');
537 else
539 gcc_assert (*p == '*');
540 p++;
541 gcc_assert (*p == 's');
542 n = va_arg (*text->args_ptr, int);
544 /* This consumes a second entry in the formatters array. */
545 gcc_assert (formatters[argno] == formatters[argno+1]);
546 argno++;
549 s = va_arg (*text->args_ptr, const char *);
550 pp_append_text (pp, s, s + n);
552 break;
554 default:
556 bool ok;
558 gcc_assert (pp_format_decoder (pp));
559 ok = pp_format_decoder (pp) (pp, text, p,
560 precision, wide, plus, hash);
561 gcc_assert (ok);
565 if (quote)
566 pp_string (pp, close_quote);
568 obstack_1grow (&buffer->chunk_obstack, '\0');
569 *formatters[argno] = XOBFINISH (&buffer->chunk_obstack, const char *);
572 #ifdef ENABLE_CHECKING
573 for (; argno < PP_NL_ARGMAX; argno++)
574 gcc_assert (!formatters[argno]);
575 #endif
577 /* Revert to normal obstack and wrapping mode. */
578 buffer->obstack = &buffer->formatted_obstack;
579 buffer->line_length = 0;
580 pp_wrapping_mode (pp) = old_wrapping_mode;
581 pp_clear_state (pp);
584 /* Format of a message pointed to by TEXT. */
585 void
586 pp_base_output_formatted_text (pretty_printer *pp)
588 unsigned int chunk;
589 output_buffer *buffer = pp_buffer (pp);
590 struct chunk_info *chunk_array = buffer->cur_chunk_array;
591 const char **args = chunk_array->args;
593 gcc_assert (buffer->obstack == &buffer->formatted_obstack);
594 gcc_assert (buffer->line_length == 0);
596 /* This is a third phase, first 2 phases done in pp_base_format_args.
597 Now we actually print it. */
598 for (chunk = 0; args[chunk]; chunk++)
599 pp_string (pp, args[chunk]);
601 /* Deallocate the chunk structure and everything after it (i.e. the
602 associated series of formatted strings). */
603 buffer->cur_chunk_array = chunk_array->prev;
604 obstack_free (&buffer->chunk_obstack, chunk_array);
607 /* Helper subroutine of output_verbatim and verbatim. Do the appropriate
608 settings needed by BUFFER for a verbatim formatting. */
609 void
610 pp_base_format_verbatim (pretty_printer *pp, text_info *text)
612 /* Set verbatim mode. */
613 pp_wrapping_mode_t oldmode = pp_set_verbatim_wrapping (pp);
615 /* Do the actual formatting. */
616 pp_format (pp, text);
617 pp_output_formatted_text (pp);
619 /* Restore previous settings. */
620 pp_wrapping_mode (pp) = oldmode;
623 /* Flush the content of BUFFER onto the attached stream. */
624 void
625 pp_base_flush (pretty_printer *pp)
627 pp_write_text_to_stream (pp);
628 pp_clear_state (pp);
629 fflush (pp->buffer->stream);
632 /* Sets the number of maximum characters per line PRETTY-PRINTER can
633 output in line-wrapping mode. A LENGTH value 0 suppresses
634 line-wrapping. */
635 void
636 pp_base_set_line_maximum_length (pretty_printer *pp, int length)
638 pp_line_cutoff (pp) = length;
639 pp_set_real_maximum_length (pp);
642 /* Clear PRETTY-PRINTER output area text info. */
643 void
644 pp_base_clear_output_area (pretty_printer *pp)
646 obstack_free (pp->buffer->obstack, obstack_base (pp->buffer->obstack));
647 pp->buffer->line_length = 0;
650 /* Set PREFIX for PRETTY-PRINTER. */
651 void
652 pp_base_set_prefix (pretty_printer *pp, const char *prefix)
654 pp->prefix = prefix;
655 pp_set_real_maximum_length (pp);
656 pp->emitted_prefix = false;
657 pp_indentation (pp) = 0;
660 /* Free PRETTY-PRINTER's prefix, a previously malloc()'d string. */
661 void
662 pp_base_destroy_prefix (pretty_printer *pp)
664 if (pp->prefix != NULL)
666 free (CONST_CAST (char *, pp->prefix));
667 pp->prefix = NULL;
671 /* Write out PRETTY-PRINTER's prefix. */
672 void
673 pp_base_emit_prefix (pretty_printer *pp)
675 if (pp->prefix != NULL)
677 switch (pp_prefixing_rule (pp))
679 default:
680 case DIAGNOSTICS_SHOW_PREFIX_NEVER:
681 break;
683 case DIAGNOSTICS_SHOW_PREFIX_ONCE:
684 if (pp->emitted_prefix)
686 pp_base_indent (pp);
687 break;
689 pp_indentation (pp) += 3;
690 /* Fall through. */
692 case DIAGNOSTICS_SHOW_PREFIX_EVERY_LINE:
694 int prefix_length = strlen (pp->prefix);
695 pp_append_r (pp, pp->prefix, prefix_length);
696 pp->emitted_prefix = true;
698 break;
703 /* Construct a PRETTY-PRINTER with PREFIX and of MAXIMUM_LENGTH
704 characters per line. */
705 void
706 pp_construct (pretty_printer *pp, const char *prefix, int maximum_length)
708 memset (pp, 0, sizeof (pretty_printer));
709 pp->buffer = XCNEW (output_buffer);
710 obstack_init (&pp->buffer->chunk_obstack);
711 obstack_init (&pp->buffer->formatted_obstack);
712 pp->buffer->obstack = &pp->buffer->formatted_obstack;
713 pp->buffer->stream = stderr;
714 pp_line_cutoff (pp) = maximum_length;
715 pp_prefixing_rule (pp) = DIAGNOSTICS_SHOW_PREFIX_ONCE;
716 pp_set_prefix (pp, prefix);
717 pp_translate_identifiers (pp) = true;
720 /* Append a string delimited by START and END to the output area of
721 PRETTY-PRINTER. No line wrapping is done. However, if beginning a
722 new line then emit PRETTY-PRINTER's prefix and skip any leading
723 whitespace if appropriate. The caller must ensure that it is
724 safe to do so. */
725 void
726 pp_base_append_text (pretty_printer *pp, const char *start, const char *end)
728 /* Emit prefix and skip whitespace if we're starting a new line. */
729 if (pp->buffer->line_length == 0)
731 pp_emit_prefix (pp);
732 if (pp_is_wrapping_line (pp))
733 while (start != end && *start == ' ')
734 ++start;
736 pp_append_r (pp, start, end - start);
739 /* Finishes constructing a NULL-terminated character string representing
740 the PRETTY-PRINTED text. */
741 const char *
742 pp_base_formatted_text (pretty_printer *pp)
744 obstack_1grow (pp->buffer->obstack, '\0');
745 return pp_formatted_text_data (pp);
748 /* Return a pointer to the last character emitted in PRETTY-PRINTER's
749 output area. A NULL pointer means no character available. */
750 const char *
751 pp_base_last_position_in_text (const pretty_printer *pp)
753 const char *p = NULL;
754 struct obstack *text = pp->buffer->obstack;
756 if (obstack_base (text) != obstack_next_free (text))
757 p = ((const char *) obstack_next_free (text)) - 1;
758 return p;
761 /* Return the amount of characters PRETTY-PRINTER can accept to
762 make a full line. Meaningful only in line-wrapping mode. */
764 pp_base_remaining_character_count_for_line (pretty_printer *pp)
766 return pp->maximum_length - pp->buffer->line_length;
770 /* Format a message into BUFFER a la printf. */
771 void
772 pp_printf (pretty_printer *pp, const char *msg, ...)
774 text_info text;
775 va_list ap;
777 va_start (ap, msg);
778 text.err_no = errno;
779 text.args_ptr = &ap;
780 text.format_spec = msg;
781 text.locus = NULL;
782 pp_format (pp, &text);
783 pp_output_formatted_text (pp);
784 va_end (ap);
788 /* Output MESSAGE verbatim into BUFFER. */
789 void
790 pp_verbatim (pretty_printer *pp, const char *msg, ...)
792 text_info text;
793 va_list ap;
795 va_start (ap, msg);
796 text.err_no = errno;
797 text.args_ptr = &ap;
798 text.format_spec = msg;
799 text.locus = NULL;
800 pp_format_verbatim (pp, &text);
801 va_end (ap);
806 /* Have PRETTY-PRINTER start a new line. */
807 void
808 pp_base_newline (pretty_printer *pp)
810 obstack_1grow (pp->buffer->obstack, '\n');
811 pp_needs_newline (pp) = false;
812 pp->buffer->line_length = 0;
815 /* Have PRETTY-PRINTER add a CHARACTER. */
816 void
817 pp_base_character (pretty_printer *pp, int c)
819 if (pp_is_wrapping_line (pp)
820 && pp_remaining_character_count_for_line (pp) <= 0)
822 pp_newline (pp);
823 if (ISSPACE (c))
824 return;
826 obstack_1grow (pp->buffer->obstack, c);
827 ++pp->buffer->line_length;
830 /* Append a STRING to the output area of PRETTY-PRINTER; the STRING may
831 be line-wrapped if in appropriate mode. */
832 void
833 pp_base_string (pretty_printer *pp, const char *str)
835 pp_maybe_wrap_text (pp, str, str + (str ? strlen (str) : 0));
838 /* Maybe print out a whitespace if needed. */
840 void
841 pp_base_maybe_space (pretty_printer *pp)
843 if (pp_base (pp)->padding != pp_none)
845 pp_space (pp);
846 pp_base (pp)->padding = pp_none;
850 /* The string starting at P has LEN (at least 1) bytes left; if they
851 start with a valid UTF-8 sequence, return the length of that
852 sequence and set *VALUE to the value of that sequence, and
853 otherwise return 0 and set *VALUE to (unsigned int) -1. */
855 static int
856 decode_utf8_char (const unsigned char *p, size_t len, unsigned int *value)
858 unsigned int t = *p;
860 if (len == 0)
861 abort ();
862 if (t & 0x80)
864 size_t utf8_len = 0;
865 unsigned int ch;
866 size_t i;
867 for (t = *p; t & 0x80; t <<= 1)
868 utf8_len++;
870 if (utf8_len > len || utf8_len < 2 || utf8_len > 6)
872 *value = (unsigned int) -1;
873 return 0;
875 ch = *p & ((1 << (7 - utf8_len)) - 1);
876 for (i = 1; i < utf8_len; i++)
878 unsigned int u = p[i];
879 if ((u & 0xC0) != 0x80)
881 *value = (unsigned int) -1;
882 return 0;
884 ch = (ch << 6) | (u & 0x3F);
886 if ( (ch <= 0x7F && utf8_len > 1)
887 || (ch <= 0x7FF && utf8_len > 2)
888 || (ch <= 0xFFFF && utf8_len > 3)
889 || (ch <= 0x1FFFFF && utf8_len > 4)
890 || (ch <= 0x3FFFFFF && utf8_len > 5)
891 || (ch >= 0xD800 && ch <= 0xDFFF))
893 *value = (unsigned int) -1;
894 return 0;
896 *value = ch;
897 return utf8_len;
899 else
901 *value = t;
902 return 1;
906 /* Allocator for identifier_to_locale and corresponding function to
907 free memory. */
909 void *(*identifier_to_locale_alloc) (size_t) = xmalloc;
910 void (*identifier_to_locale_free) (void *) = free;
912 /* Given IDENT, an identifier in the internal encoding, return a
913 version of IDENT suitable for diagnostics in the locale character
914 set: either IDENT itself, or a string, allocated using
915 identifier_to_locale_alloc, converted to the locale character set
916 and using escape sequences if not representable in the locale
917 character set or containing control characters or invalid byte
918 sequences. Existing backslashes in IDENT are not doubled, so the
919 result may not uniquely specify the contents of an arbitrary byte
920 sequence identifier. */
922 const char *
923 identifier_to_locale (const char *ident)
925 const unsigned char *uid = (const unsigned char *) ident;
926 size_t idlen = strlen (ident);
927 bool valid_printable_utf8 = true;
928 bool all_ascii = true;
929 size_t i;
931 for (i = 0; i < idlen;)
933 unsigned int c;
934 size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c);
935 if (utf8_len == 0 || c <= 0x1F || (c >= 0x7F && c <= 0x9F))
937 valid_printable_utf8 = false;
938 break;
940 if (utf8_len > 1)
941 all_ascii = false;
942 i += utf8_len;
945 /* If IDENT contains invalid UTF-8 sequences (which may occur with
946 attributes putting arbitrary byte sequences in identifiers), or
947 control characters, we use octal escape sequences for all bytes
948 outside printable ASCII. */
949 if (!valid_printable_utf8)
951 char *ret = (char *) identifier_to_locale_alloc (4 * idlen + 1);
952 char *p = ret;
953 for (i = 0; i < idlen; i++)
955 if (uid[i] > 0x1F && uid[i] < 0x7F)
956 *p++ = uid[i];
957 else
959 sprintf (p, "\\%03o", uid[i]);
960 p += 4;
963 *p = 0;
964 return ret;
967 /* Otherwise, if it is valid printable ASCII, or printable UTF-8
968 with the locale character set being UTF-8, IDENT is used. */
969 if (all_ascii || locale_utf8)
970 return ident;
972 /* Otherwise IDENT is converted to the locale character set if
973 possible. */
974 #if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV
975 if (locale_encoding != NULL)
977 iconv_t cd = iconv_open (locale_encoding, "UTF-8");
978 bool conversion_ok = true;
979 char *ret = NULL;
980 if (cd != (iconv_t) -1)
982 size_t ret_alloc = 4 * idlen + 1;
983 for (;;)
985 /* Repeat the whole conversion process as needed with
986 larger buffers so non-reversible transformations can
987 always be detected. */
988 ICONV_CONST char *inbuf = CONST_CAST (char *, ident);
989 char *outbuf;
990 size_t inbytesleft = idlen;
991 size_t outbytesleft = ret_alloc - 1;
992 size_t iconv_ret;
994 ret = (char *) identifier_to_locale_alloc (ret_alloc);
995 outbuf = ret;
997 if (iconv (cd, 0, 0, 0, 0) == (size_t) -1)
999 conversion_ok = false;
1000 break;
1003 iconv_ret = iconv (cd, &inbuf, &inbytesleft,
1004 &outbuf, &outbytesleft);
1005 if (iconv_ret == (size_t) -1 || inbytesleft != 0)
1007 if (errno == E2BIG)
1009 ret_alloc *= 2;
1010 identifier_to_locale_free (ret);
1011 ret = NULL;
1012 continue;
1014 else
1016 conversion_ok = false;
1017 break;
1020 else if (iconv_ret != 0)
1022 conversion_ok = false;
1023 break;
1025 /* Return to initial shift state. */
1026 if (iconv (cd, 0, 0, &outbuf, &outbytesleft) == (size_t) -1)
1028 if (errno == E2BIG)
1030 ret_alloc *= 2;
1031 identifier_to_locale_free (ret);
1032 ret = NULL;
1033 continue;
1035 else
1037 conversion_ok = false;
1038 break;
1041 *outbuf = 0;
1042 break;
1044 iconv_close (cd);
1045 if (conversion_ok)
1046 return ret;
1049 #endif
1051 /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */
1053 char *ret = (char *) identifier_to_locale_alloc (10 * idlen + 1);
1054 char *p = ret;
1055 for (i = 0; i < idlen;)
1057 unsigned int c;
1058 size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c);
1059 if (utf8_len == 1)
1060 *p++ = uid[i];
1061 else
1063 sprintf (p, "\\U%08x", c);
1064 p += 10;
1066 i += utf8_len;
1068 *p = 0;
1069 return ret;