tagged release 0.6.4
[parrot.git] / src / spf_render.c
blob7a69b21f0042f57adc10803740422869bac870d6
1 /*
2 Copyright (C) 2001-2007, The Perl Foundation.
3 $Id$
5 =head1 NAME
7 src/spf_render.c - Parrot sprintf
9 =head1 DESCRIPTION
11 Implements the main function that drives the C<Parrot_sprintf> family
12 and its utility functions.
14 =head2 Utility Functions
16 =over 4
18 =cut
22 #define IN_SPF_SYSTEM
24 #include "parrot/parrot.h"
25 #include "parrot/string_funcs.h"
26 #include "spf_render.str"
28 typedef enum {
29 PHASE_FLAGS = 0,
30 PHASE_WIDTH,
31 PHASE_PREC,
32 PHASE_TYPE,
33 PHASE_TERM,
34 PHASE_DONE
35 } PHASE;
37 typedef struct SpfInfo_tag {
38 UINTVAL width;
39 UINTVAL prec;
40 INTVAL flags;
41 INTVAL type;
42 PHASE phase;
43 } SpfInfo;
45 enum {
46 FLAG_MINUS = (1<<0),
47 FLAG_PLUS = (1<<1),
48 FLAG_ZERO = (1<<2),
49 FLAG_SPACE = (1<<3),
50 FLAG_SHARP = (1<<4),
51 FLAG_WIDTH = (1<<5),
52 FLAG_PREC = (1<<6)
55 /* HEADERIZER HFILE: include/parrot/misc.h */
57 /* HEADERIZER BEGIN: static */
58 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
60 static void gen_sprintf_call(
61 ARGOUT(char *out),
62 ARGMOD(SpfInfo *info),
63 int thingy)
64 __attribute__nonnull__(1)
65 __attribute__nonnull__(2)
66 FUNC_MODIFIES(*out)
67 FUNC_MODIFIES(*info);
69 PARROT_CANNOT_RETURN_NULL
70 static STRING * handle_flags(PARROT_INTERP,
71 ARGIN(const SpfInfo *info),
72 ARGMOD(STRING *str),
73 INTVAL is_int_type,
74 ARGIN_NULLOK(STRING* prefix))
75 __attribute__nonnull__(1)
76 __attribute__nonnull__(2)
77 __attribute__nonnull__(3)
78 FUNC_MODIFIES(*str);
80 PARROT_CANNOT_RETURN_NULL
81 static STRING* str_append_w_flags(PARROT_INTERP,
82 ARGOUT(STRING *dest),
83 ARGIN(const SpfInfo *info),
84 ARGMOD(STRING *src),
85 ARGIN_NULLOK(STRING *prefix))
86 __attribute__nonnull__(1)
87 __attribute__nonnull__(2)
88 __attribute__nonnull__(3)
89 __attribute__nonnull__(4)
90 FUNC_MODIFIES(*dest)
91 FUNC_MODIFIES(*src);
93 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
94 /* HEADERIZER END: static */
97 /* Per Dan's orders, we will not use sprintf if snprintf isn't
98 * around for us.
100 #ifdef _MSC_VER
101 # define snprintf _snprintf
102 #endif
106 =item C<static STRING * handle_flags>
108 Handles C<+>, C<->, C<0>, C<#>, space, width, and prec.
110 =cut
114 PARROT_CANNOT_RETURN_NULL
115 static STRING *
116 handle_flags(PARROT_INTERP, ARGIN(const SpfInfo *info), ARGMOD(STRING *str),
117 INTVAL is_int_type, ARGIN_NULLOK(STRING* prefix))
119 UINTVAL len = string_length(interp, str);
121 if (is_int_type) {
122 if (info->flags & FLAG_PREC && info->prec == 0 &&
123 len == 1 &&
124 string_ord(interp, str, 0) == '0') {
125 string_chopn_inplace(interp, str, len);
126 len = 0;
128 /* +, space */
129 if (!len || string_ord(interp, str, 0) != '-') {
130 if (info->flags & FLAG_PLUS) {
131 STRING * const cs = CONST_STRING(interp, "+");
132 str = string_concat(interp, cs, str, 0);
133 len++;
135 else if (info->flags & FLAG_SPACE) {
136 STRING * const cs = CONST_STRING(interp, " ");
137 str = string_concat(interp, cs, str, 0);
138 len++;
142 /* # 0x ... */
143 if ((info->flags & FLAG_SHARP) && prefix) {
144 str = string_concat(interp, prefix, str, 0);
145 len += string_length(interp, prefix);
147 /* XXX sharp + fill ??? */
149 #if 0
150 /* precision - only for floats, which is handled elsewhere */
151 if (info->flags & FLAG_PREC) {
152 info->flags |= FLAG_WIDTH;
153 if (string_ord(interp, str, 0) == '-' ||
154 string_ord(interp, str, 0) == '+') {
155 info->width = info->prec + 1;
157 else {
158 info->width = info->prec;
161 #endif
163 else {
164 /* string precision */
165 if (info->flags & FLAG_PREC && info->prec == 0) {
166 string_chopn_inplace(interp, str, len);
167 len = 0;
169 else
170 if (info->flags & FLAG_PREC && info->prec < len) {
171 string_chopn_inplace(interp, str, -(INTVAL)(info->prec));
172 len = info->prec;
176 if ((info->flags & FLAG_WIDTH) && info->width > len) {
177 STRING * const filler =
178 ((info->flags & FLAG_ZERO) && !(info->flags & FLAG_MINUS))
179 ? CONST_STRING(interp, "0")
180 : CONST_STRING(interp, " ");
181 STRING * const fill = string_repeat(interp, filler, info->width - len, NULL);
183 if (info->flags & FLAG_MINUS) { /* left-align */
184 str = string_concat(interp, str, fill, 0);
186 else { /* right-align */
187 /* signed and zero padded */
188 if (info->flags & FLAG_ZERO
189 && (string_ord(interp, str, 0) == '-' ||
190 string_ord(interp, str, 0) == '+')) {
191 STRING *temp = NULL;
192 STRING *ignored;
193 ignored = string_substr(interp, str, 1, len-1, &temp, 0);
194 UNUSED(ignored);
195 string_chopn_inplace(interp, str, -1);
196 str = string_append(interp, str, fill);
197 str = string_append(interp, str, temp);
199 else {
200 str = string_concat(interp, fill, str, 0);
204 return str;
209 =item C<static STRING* str_append_w_flags>
211 Used by Parrot_sprintf_format. Prepends supplied prefix for numeric
212 values. (e.g. 0x for hex.)
214 Returns the pointer to the modified string.
216 =cut
220 PARROT_CANNOT_RETURN_NULL
221 static STRING*
222 str_append_w_flags(PARROT_INTERP, ARGOUT(STRING *dest), ARGIN(const SpfInfo *info),
223 ARGMOD(STRING *src), ARGIN_NULLOK(STRING *prefix))
225 src = handle_flags(interp, info, src, 1, prefix);
226 dest = string_append(interp, dest, src);
227 return dest;
232 =item C<static void gen_sprintf_call>
234 Turn the info structure back into an sprintf format. Far from being
235 pointless, this is used to call C<snprintf()> when we're confronted with
236 a float.
238 =cut
242 static void
243 gen_sprintf_call(ARGOUT(char *out), ARGMOD(SpfInfo *info), int thingy)
245 int i = 0;
246 out[i++] = '%';
248 if (info->flags) {
249 if (info->flags & FLAG_MINUS)
250 out[i++] = '-';
252 if (info->flags & FLAG_PLUS)
253 out[i++] = '+';
255 if (info->flags & FLAG_ZERO)
256 out[i++] = '0';
258 if (info->flags & FLAG_SPACE)
259 out[i++] = ' ';
261 if (info->flags & FLAG_SHARP)
262 out[i++] = '#';
265 if (info->flags & FLAG_WIDTH) {
266 if (info->width > PARROT_SPRINTF_BUFFER_SIZE - 1)
267 info->width = PARROT_SPRINTF_BUFFER_SIZE;
269 i += sprintf(out + i, "%u", (unsigned)info->width);
272 if (info->flags & FLAG_PREC) {
273 if (info->prec > PARROT_SPRINTF_MAX_PREC)
274 info->prec = PARROT_SPRINTF_MAX_PREC;
276 out[i++] = '.';
277 i += sprintf(out + i, "%u", (unsigned)info->prec);
280 if (thingy == 'd' || thingy == 'i' ||thingy == 'u') {
281 /* the u?int isa HUGEU?INTVAL aka long long
282 * the 'll' modifier is specced in susv3 - hopefully all our
283 * compilers support it too */
284 out[i++] = 'l';
285 out[i++] = 'l';
288 out[i++] = (char)thingy;
289 out[i] = 0;
295 =item C<STRING * Parrot_sprintf_format>
297 This is the engine that does all the formatting.
299 =cut
303 PARROT_WARN_UNUSED_RESULT
304 PARROT_CANNOT_RETURN_NULL
305 STRING *
306 Parrot_sprintf_format(PARROT_INTERP,
307 ARGIN(STRING *pat), ARGIN(SPRINTF_OBJ *obj))
309 INTVAL i;
310 INTVAL len = 0;
311 INTVAL old = 0;
312 INTVAL pat_len = (INTVAL)string_length(interp, pat);
314 /* start with a buffer; double the pattern length to avoid realloc #1 */
315 STRING *targ = string_make_empty(interp, enum_stringrep_one, pat_len << 1);
317 /* ts is used almost universally as an intermediate target;
318 * tc is used as a temporary buffer by uint_to_string and
319 * as a target by gen_sprintf_call.
321 STRING *substr = NULL;
322 char tc[PARROT_SPRINTF_BUFFER_SIZE];
324 for (i = 0; i < pat_len; i++) {
325 if (string_ord(interp, pat, i) == '%') { /* % */
326 if (len) {
327 STRING *ignored
328 = string_substr(interp, pat, old, len, &substr, 1);
329 UNUSED(ignored);
330 /* XXX This shouldn't modify targ the pointer */
331 targ = string_append(interp, targ, substr);
333 len = 0;
334 old = i;
335 if (string_ord(interp, pat, i + 1) == '%') {
336 /* skip this one, make next the first char
337 * of literal sequence, starting at old */
338 i++;
339 old++;
340 len++;
341 continue;
343 else {
344 /* hoo boy, here we go... */
346 HUGEINTVAL sharedint = 0;
348 /* Storage for flags, etc. */
349 SpfInfo info = { 0, 0, 0, 0, (PHASE)0 };
351 /* Reset temporaries */
352 tc[0] = '\0';
354 /* This can be really hard to understand, so I'll try to explain beforehand.
355 * A rough grammar for a printf format is:
357 * grammar Parrot::PrintF_Format {
358 * rule format {
359 * <other_stuff> (<field> <other_stuff>)*
362 * rule other_stuff {
363 * [<[^\%]> | \%\%]*:
366 * rule field {
367 * \%
368 * <flags>?
369 * <width>?
370 * [\.<prec>]?
371 * <size>?
372 * <term>
375 * rule flags {
376 * <[
377 * + # prefix with a + if necessary
378 * - # left-align
379 * 0 # zero-pad
380 * <sp> # space-pad
381 * \# # 0, 0x on octal, hex; force decimal point on float
382 * ]>+
385 * rule width {
386 * [\d|\*]+ # minimum width
389 * rule prec {
390 * [\d|\*]+ # width on integers;
391 * # number of digits after decimal on floats;
392 * # maximum width on strings
395 * rule size {
396 * <[
397 * h # short (or float)
398 * l # long
399 * H # HUGEwhateverVAL (long [long]?, [long]? double)
400 * v # whateverVAL
401 * O # opcode_t
402 * P # really a PMC
403 * S # Parrot string (only with %s)
404 * ]>
407 * rule term {
408 * <[
409 * c # char
410 * d # integer
411 * i # integer
412 * o # octal
413 * x # hex
414 * X # hex with capital X (if #)
415 * b # binary
416 * B # binary with capital B (if #)
417 * u # unsigned integer
418 * p # pointer
420 * e # 1e1
421 * E # 1E1
422 * f # 1.0
423 * g # 1, 0.1, 1e1
424 * G # 1, 0.1, 1E1
426 * s # string
427 * ]>
431 * Complication: once upon a time, %P existed. Now you should
432 * use %Ps, %Pd or %Pf, but we still need to support the old form.
433 * The same is true of %S--%Ss is the best form, but %S is still
434 * supported.
436 * The implementation of Parrot_vsprintf is surprisingly similar to this
437 * regex, even though the two were developed semi-independently.
438 * Parrot_vsprintf keeps track of what it expects to see next (the
439 * 'phase')--flags, width, precision, size, or field type (term). If it
440 * doesn't find a character that fits whatever it's expecting, it sets
441 * info.phase to the next thing and tries it. The first four phases just
442 * set flags--the last does all the work.
445 for (i++; i < pat_len && info.phase != PHASE_DONE; i++) {
446 const INTVAL ch = string_ord(interp, pat, i);
448 switch (info.phase) {
449 /*@fallthrough@ */ case PHASE_FLAGS:
450 switch (ch) {
451 case '-':
452 info.flags |= FLAG_MINUS;
453 continue;
455 case '+':
456 info.flags |= FLAG_PLUS;
457 continue;
459 case '0':
460 info.flags |= FLAG_ZERO;
461 continue;
463 case ' ':
464 info.flags |= FLAG_SPACE;
465 continue;
467 case '#':
468 info.flags |= FLAG_SHARP;
469 continue;
471 default:
472 info.phase = PHASE_WIDTH;
476 /*@fallthrough@ */ case PHASE_WIDTH:
477 switch (ch) {
478 case '0':
479 case '1':
480 case '2':
481 case '3':
482 case '4':
483 case '5':
484 case '6':
485 case '7':
486 case '8':
487 case '9':
488 info.flags |= FLAG_WIDTH;
489 info.width *= 10;
490 info.width += ch - '0';
491 continue;
493 case '*':
494 info.flags |= FLAG_WIDTH;
495 info.width = (UINTVAL)obj->getint(interp,
496 SIZE_XVAL, obj);
497 /* fall through */
499 case '.':
500 info.phase = PHASE_PREC;
501 continue;
503 default:
504 info.phase = PHASE_PREC;
508 /*@fallthrough@ */ case PHASE_PREC:
509 switch (ch) {
510 case '0':
511 case '1':
512 case '2':
513 case '3':
514 case '4':
515 case '5':
516 case '6':
517 case '7':
518 case '8':
519 case '9':
520 info.flags |= FLAG_PREC;
521 info.prec *= 10;
522 info.prec += ch - '0';
523 continue;
525 case '*':
526 info.flags |= FLAG_PREC;
527 info.prec = (UINTVAL)obj->getint(interp,
528 SIZE_XVAL, obj);
529 info.phase = PHASE_TYPE;
530 continue;
532 default:
533 info.phase = PHASE_TYPE;
536 /*@fallthrough@ */ case PHASE_TYPE:
537 switch (ch) {
538 case 'h':
539 info.type = SIZE_SHORT;
540 continue;
542 case 'l':
543 info.type = SIZE_LONG;
544 continue;
546 case 'L':
547 case 'H':
548 info.type = SIZE_HUGE;
549 continue;
551 case 'v':
552 info.type = SIZE_XVAL;
553 continue;
555 case 'O':
556 info.type = SIZE_OPCODE;
557 continue;
559 case 'P':
560 info.type = SIZE_PMC;
561 continue;
563 case 'S':
564 info.type = SIZE_PSTR;
565 continue;
567 default:
568 info.phase = PHASE_TERM;
572 /*@fallthrough@ */ case PHASE_TERM:
573 switch (ch) {
574 /* INTEGERS */
575 case 'c':
577 STRING * const ts = string_chr(interp,
578 (UINTVAL)obj->getint(interp, info.type, obj));
579 targ = str_append_w_flags(interp, targ, &info, ts, NULL);
581 break;
583 case 'o':
585 const UHUGEINTVAL theuint =
586 obj->getuint(interp, info.type, obj);
587 STRING * const ts =
588 uint_to_str(interp, tc, theuint, 8, 0);
589 STRING * const prefix = CONST_STRING(interp, "0");
591 /* unsigned conversion - no plus */
592 info.flags &= ~FLAG_PLUS;
593 targ = str_append_w_flags(interp, targ,
594 &info, ts, prefix);
596 break;
598 case 'x':
600 const UHUGEINTVAL theuint =
601 obj->getuint(interp, info.type, obj);
602 STRING * const ts =
603 uint_to_str(interp, tc, theuint, 16, 0);
604 STRING * const prefix = CONST_STRING(interp, "0x");
606 /* unsigned conversion - no plus */
607 info.flags &= ~FLAG_PLUS;
608 targ = str_append_w_flags(interp, targ,
609 &info, ts, prefix);
611 break;
613 case 'X':
615 STRING * const prefix = CONST_STRING(interp, "0X");
616 const UHUGEINTVAL theuint =
617 obj->getuint(interp, info.type, obj);
618 STRING * const ts =
619 uint_to_str(interp, tc, theuint, 16, 0);
620 string_upcase_inplace(interp, ts);
622 /* unsigned conversion - no plus */
623 info.flags &= ~FLAG_PLUS;
624 targ = str_append_w_flags(interp, targ,
625 &info, ts, prefix);
627 break;
629 case 'b':
631 STRING * const prefix = CONST_STRING(interp, "0b");
632 const UHUGEINTVAL theuint =
633 obj->getuint(interp, info.type, obj);
634 STRING * const ts =
635 uint_to_str(interp, tc, theuint, 2, 0);
637 /* unsigned conversion - no plus */
638 info.flags &= ~FLAG_PLUS;
639 targ = str_append_w_flags(interp, targ,
640 &info, ts, prefix);
642 break;
644 case 'B':
646 STRING * const prefix = CONST_STRING(interp, "0B");
647 const HUGEINTVAL theint =
648 obj->getint(interp, info.type, obj);
649 STRING * const ts =
650 int_to_str(interp, tc, theint, 2);
652 /* unsigned conversion - no plus */
653 info.flags &= ~FLAG_PLUS;
654 targ = str_append_w_flags(interp, targ,
655 &info, ts, prefix);
657 break;
659 case 'u':
661 const UHUGEINTVAL theuint =
662 obj->getuint(interp, info.type, obj);
663 sharedint = theuint;
665 goto do_sprintf;
666 case 'd':
667 case 'i':
669 /* EVIL: Work around bug in glibc that makes %0lld
670 * sometimes output an empty string. */
671 if (!(info.flags & FLAG_WIDTH))
672 info.flags &= ~FLAG_ZERO;
674 sharedint = obj->getint(interp, info.type, obj);
675 do_sprintf:
677 STRING *ts;
678 gen_sprintf_call(tc, &info, ch);
679 ts = cstr2pstr(tc);
681 char * const tempstr =
682 string_to_cstring(interp, ts);
684 #ifdef PARROT_HAS_SNPRINTF
685 snprintf(tc, PARROT_SPRINTF_BUFFER_SIZE,
686 tempstr, sharedint);
687 #else
688 /* the buffer is 4096, so no problem here */
689 sprintf(tc, tempstr, sharedint);
690 #endif
691 string_cstring_free(tempstr);
693 targ = string_append(interp, targ, cstr2pstr(tc));
695 break;
697 case 'p':
699 STRING * const prefix = CONST_STRING(interp, "0x");
700 const void * const ptr =
701 obj->getptr(interp, info.type, obj);
702 STRING * const ts = uint_to_str(interp, tc,
703 (HUGEINTVAL) (size_t) ptr, 16, 0);
705 targ = str_append_w_flags(interp, targ, &info,
706 ts, prefix);
708 break;
710 /* FLOATS - We cheat on these and use snprintf. */
711 case 'e':
712 case 'E':
713 case 'f':
714 case 'g':
715 case 'G':
717 STRING *ts;
718 const HUGEFLOATVAL thefloat =
719 obj->getfloat(interp, info.type, obj);
721 /* turn -0.0 into 0.0 */
722 gen_sprintf_call(tc, &info, ch);
723 ts = cstr2pstr(tc);
725 /* XXX lost precision if %Hg or whatever */
727 char * const tempstr =
728 string_to_cstring(interp, ts);
730 #ifdef PARROT_HAS_SNPRINTF
731 snprintf(tc, PARROT_SPRINTF_BUFFER_SIZE,
732 tempstr,
733 (double)thefloat);
734 #else
735 /* the buffer is 4096, so no problem here */
736 sprintf(tc, tempstr, (double)thefloat);
737 #endif
738 string_cstring_free(tempstr);
741 #ifdef WIN32
743 /* Microsoft defaults to three digits for
744 * exponents, even when fewer digits would suffice.
745 * For the sake of portability, we will here
746 * attempt to hide that. */
747 if (ch == 'g' || ch == 'G'
748 || ch == 'e' || ch == 'E') {
749 const size_t tclen = strlen(tc);
750 size_t j;
751 for (j = 0; j < tclen; j++) {
752 if ((tc[j] == 'e' || tc[j] == 'E')
753 && (tc[j+1] == '+' || tc[j+1] == '-')
754 && tc[j+2] == '0'
755 && isdigit((unsigned char)tc[j+3])
756 && isdigit((unsigned char)tc[j+4]))
758 mem_sys_memmove(&tc[j+2], &tc[j+3],
759 strlen(&tc[j+2]));
761 /* now fix any broken length */
763 if ((info.flags & FLAG_WIDTH)
764 && strlen(tc) < info.width) {
765 if (info.flags & FLAG_MINUS)
766 strcat(tc, " ");
767 else {
768 mem_sys_memmove(&tc[1], &tc[0],
769 strlen(tc) + 1);
770 tc[0] = (info.flags & FLAG_ZERO) ? '0' : ' ';
774 /* only one fix required per string */
775 break;
779 #endif /* WIN32 */
781 targ = string_append(interp, targ, cstr2pstr(tc));
783 break;
785 /* STRINGS */
786 case 'r': /* Python repr */
787 /* XXX the right fix is to add a getrepr entry *
788 * to SPRINTF_OBJ, but for now, getstring_pmc *
789 * is inlined and modified to call get_repr */
790 if (obj->getstring == pmc_core.getstring) {
791 PMC * const tmp =
792 VTABLE_get_pmc_keyed_int(interp,
793 ((PMC *)obj->data),
794 (obj->index));
796 STRING *string = (VTABLE_get_repr(interp, tmp));
797 STRING *ts = handle_flags(interp, &info,
798 string, 0, NULL);
799 obj->index++;
801 targ = string_append(interp, targ, ts);
802 break;
805 case 's':
806 CASE_s:
808 STRING * const string = obj->getstring(interp,
809 info.type, obj);
810 STRING * const ts = handle_flags(interp, &info,
811 string, 0, NULL);
812 targ = string_append(interp, targ, ts);
814 break;
816 default:
817 /* fake the old %P and %S commands */
818 if (info.type == SIZE_PMC
819 || info.type == SIZE_PSTR) {
820 i--;
821 goto CASE_s;
822 /* case 's' will see the SIZE_PMC or SIZE_PSTR
823 * and assume it was %Ps (or %Ss). Genius,
824 * no? */
826 else {
827 real_exception(interp, NULL, INVALID_CHARACTER,
828 "'%c' is not a valid "
829 "sprintf format", ch);
833 info.phase = PHASE_DONE;
834 break;
836 case PHASE_DONE:
837 default:
838 /* This is the terminating condition of the surrounding
839 * loop, so...
841 PANIC(interp, "We can't be here");
846 old = i;
847 i--;
849 else {
850 len++;
853 if (len) {
854 STRING *ignored = string_substr(interp, pat, old, len, &substr, 1);
855 UNUSED(ignored);
856 targ = string_append(interp, targ, substr);
859 return targ;
864 =back
866 =head1 SEE ALSO
868 F<src/misc.h>, F<src/misc.c>, F<src/spf_vtable.c>.
870 =cut
876 * Local variables:
877 * c-file-style: "parrot"
878 * End:
879 * vim: expandtab shiftwidth=4: