tagged release 0.7.1
[parrot.git] / src / spf_render.c
blobf57f6ed43c40996edd0104f263af0f630dc045b5
1 /*
2 Copyright (C) 2001-2008, The Perl Foundation.
3 $Id$
5 =head1 NAME
7 src/spf_render.c - Parrot sprintf
9 =head1 DESCRIPTION
11 Implements the main function that drives the C<Parrot_sprintf> family
12 and its utility functions.
14 =head2 Utility Functions
16 =over 4
18 =cut
22 #define IN_SPF_SYSTEM
24 #include "parrot/parrot.h"
25 #include "parrot/string_funcs.h"
26 #include "spf_render.str"
28 typedef enum {
29 PHASE_FLAGS = 0,
30 PHASE_WIDTH,
31 PHASE_PREC,
32 PHASE_TYPE,
33 PHASE_TERM,
34 PHASE_DONE
35 } PHASE;
37 typedef struct SpfInfo_tag {
38 UINTVAL width;
39 UINTVAL prec;
40 INTVAL flags;
41 INTVAL type;
42 PHASE phase;
43 } SpfInfo;
45 enum {
46 FLAG_MINUS = (1<<0),
47 FLAG_PLUS = (1<<1),
48 FLAG_ZERO = (1<<2),
49 FLAG_SPACE = (1<<3),
50 FLAG_SHARP = (1<<4),
51 FLAG_WIDTH = (1<<5),
52 FLAG_PREC = (1<<6)
55 /* HEADERIZER HFILE: include/parrot/misc.h */
57 /* HEADERIZER BEGIN: static */
58 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
60 static void gen_sprintf_call(
61 ARGOUT(char *out),
62 ARGMOD(SpfInfo *info),
63 int thingy)
64 __attribute__nonnull__(1)
65 __attribute__nonnull__(2)
66 FUNC_MODIFIES(*out)
67 FUNC_MODIFIES(*info);
69 PARROT_CANNOT_RETURN_NULL
70 static STRING * handle_flags(PARROT_INTERP,
71 ARGIN(const SpfInfo *info),
72 ARGMOD(STRING *str),
73 INTVAL is_int_type,
74 ARGIN_NULLOK(STRING* prefix))
75 __attribute__nonnull__(1)
76 __attribute__nonnull__(2)
77 __attribute__nonnull__(3)
78 FUNC_MODIFIES(*str);
80 PARROT_CANNOT_RETURN_NULL
81 static STRING* str_append_w_flags(PARROT_INTERP,
82 ARGOUT(STRING *dest),
83 ARGIN(const SpfInfo *info),
84 ARGMOD(STRING *src),
85 ARGIN_NULLOK(STRING *prefix))
86 __attribute__nonnull__(1)
87 __attribute__nonnull__(2)
88 __attribute__nonnull__(3)
89 __attribute__nonnull__(4)
90 FUNC_MODIFIES(*dest)
91 FUNC_MODIFIES(*src);
93 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
94 /* HEADERIZER END: static */
97 /* Per Dan's orders, we will not use sprintf if snprintf isn't
98 * around for us.
100 #ifdef _MSC_VER
101 # define snprintf _snprintf
102 #endif
106 =item C<static STRING * handle_flags>
108 Handles C<+>, C<->, C<0>, C<#>, space, width, and prec.
110 =cut
114 PARROT_CANNOT_RETURN_NULL
115 static STRING *
116 handle_flags(PARROT_INTERP, ARGIN(const SpfInfo *info), ARGMOD(STRING *str),
117 INTVAL is_int_type, ARGIN_NULLOK(STRING* prefix))
119 UINTVAL len = string_length(interp, str);
121 if (is_int_type) {
122 if (info->flags & FLAG_PREC && info->prec == 0 &&
123 len == 1 &&
124 string_ord(interp, str, 0) == '0') {
125 string_chopn_inplace(interp, str, len);
126 len = 0;
128 /* +, space */
129 if (!len || string_ord(interp, str, 0) != '-') {
130 if (info->flags & FLAG_PLUS) {
131 STRING * const cs = CONST_STRING(interp, "+");
132 str = string_concat(interp, cs, str, 0);
133 len++;
135 else if (info->flags & FLAG_SPACE) {
136 STRING * const cs = CONST_STRING(interp, " ");
137 str = string_concat(interp, cs, str, 0);
138 len++;
142 /* # 0x ... */
143 if ((info->flags & FLAG_SHARP) && prefix) {
144 str = string_concat(interp, prefix, str, 0);
145 len += string_length(interp, prefix);
147 /* XXX sharp + fill ??? */
149 #if 0
150 /* precision - only for floats, which is handled elsewhere */
151 if (info->flags & FLAG_PREC) {
152 info->flags |= FLAG_WIDTH;
153 if (string_ord(interp, str, 0) == '-' ||
154 string_ord(interp, str, 0) == '+') {
155 info->width = info->prec + 1;
157 else {
158 info->width = info->prec;
161 #endif
163 else {
164 /* string precision */
165 if (info->flags & FLAG_PREC && info->prec == 0) {
166 string_chopn_inplace(interp, str, len);
167 len = 0;
169 else
170 if (info->flags & FLAG_PREC && info->prec < len) {
171 string_chopn_inplace(interp, str, -(INTVAL)(info->prec));
172 len = info->prec;
176 if ((info->flags & FLAG_WIDTH) && info->width > len) {
177 STRING * const filler =
178 ((info->flags & FLAG_ZERO) && !(info->flags & FLAG_MINUS))
179 ? CONST_STRING(interp, "0")
180 : CONST_STRING(interp, " ");
181 STRING * const fill = string_repeat(interp, filler, info->width - len, NULL);
183 if (info->flags & FLAG_MINUS) { /* left-align */
184 str = string_concat(interp, str, fill, 0);
186 else { /* right-align */
187 /* signed and zero padded */
188 if (info->flags & FLAG_ZERO
189 && (string_ord(interp, str, 0) == '-' ||
190 string_ord(interp, str, 0) == '+')) {
191 STRING *temp = NULL;
192 STRING *ignored;
193 ignored = string_substr(interp, str, 1, len-1, &temp, 0);
194 UNUSED(ignored);
195 string_chopn_inplace(interp, str, -1);
196 str = string_append(interp, str, fill);
197 str = string_append(interp, str, temp);
199 else {
200 str = string_concat(interp, fill, str, 0);
204 return str;
209 =item C<static STRING* str_append_w_flags>
211 Used by Parrot_sprintf_format. Prepends supplied prefix for numeric
212 values. (e.g. 0x for hex.)
214 Returns the pointer to the modified string.
216 =cut
220 PARROT_CANNOT_RETURN_NULL
221 static STRING*
222 str_append_w_flags(PARROT_INTERP, ARGOUT(STRING *dest), ARGIN(const SpfInfo *info),
223 ARGMOD(STRING *src), ARGIN_NULLOK(STRING *prefix))
225 src = handle_flags(interp, info, src, 1, prefix);
226 dest = string_append(interp, dest, src);
227 return dest;
232 =item C<static void gen_sprintf_call>
234 Turn the info structure back into an sprintf format. Far from being
235 pointless, this is used to call C<snprintf()> when we're confronted with
236 a float.
238 =cut
242 static void
243 gen_sprintf_call(ARGOUT(char *out), ARGMOD(SpfInfo *info), int thingy)
245 int i = 0;
246 out[i++] = '%';
248 if (info->flags) {
249 if (info->flags & FLAG_MINUS)
250 out[i++] = '-';
252 if (info->flags & FLAG_PLUS)
253 out[i++] = '+';
255 if (info->flags & FLAG_ZERO)
256 out[i++] = '0';
258 if (info->flags & FLAG_SPACE)
259 out[i++] = ' ';
261 if (info->flags & FLAG_SHARP)
262 out[i++] = '#';
265 if (info->flags & FLAG_WIDTH) {
266 if (info->width > PARROT_SPRINTF_BUFFER_SIZE - 1)
267 info->width = PARROT_SPRINTF_BUFFER_SIZE;
269 i += sprintf(out + i, "%u", (unsigned)info->width);
272 if (info->flags & FLAG_PREC) {
273 if (info->prec > PARROT_SPRINTF_MAX_PREC)
274 info->prec = PARROT_SPRINTF_MAX_PREC;
276 out[i++] = '.';
277 i += sprintf(out + i, "%u", (unsigned)info->prec);
280 if (thingy == 'd' || thingy == 'i' ||thingy == 'u') {
281 /* the u?int isa HUGEU?INTVAL aka long long
282 * the 'll' modifier is specced in susv3 - hopefully all our
283 * compilers support it too */
284 out[i++] = 'l';
285 out[i++] = 'l';
288 out[i++] = (char)thingy;
289 out[i] = 0;
295 =item C<STRING * Parrot_sprintf_format>
297 This is the engine that does all the formatting.
299 =cut
303 PARROT_WARN_UNUSED_RESULT
304 PARROT_CANNOT_RETURN_NULL
305 STRING *
306 Parrot_sprintf_format(PARROT_INTERP,
307 ARGIN(STRING *pat), ARGIN(SPRINTF_OBJ *obj))
309 INTVAL i;
310 INTVAL len = 0;
311 INTVAL old = 0;
312 INTVAL pat_len = (INTVAL)string_length(interp, pat);
313 HUGEINTVAL num;
315 /* start with a buffer; double the pattern length to avoid realloc #1 */
316 STRING *targ = string_make_empty(interp, enum_stringrep_one, pat_len << 1);
318 /* ts is used almost universally as an intermediate target;
319 * tc is used as a temporary buffer by uint_to_string and
320 * as a target by gen_sprintf_call.
322 STRING *substr = NULL;
323 char tc[PARROT_SPRINTF_BUFFER_SIZE];
325 for (i = 0; i < pat_len; i++) {
326 if (string_ord(interp, pat, i) == '%') { /* % */
327 if (len) {
328 STRING *ignored
329 = string_substr(interp, pat, old, len, &substr, 1);
330 UNUSED(ignored);
331 /* XXX This shouldn't modify targ the pointer */
332 targ = string_append(interp, targ, substr);
334 len = 0;
335 old = i;
336 if (string_ord(interp, pat, i + 1) == '%') {
337 /* skip this one, make next the first char
338 * of literal sequence, starting at old */
339 i++;
340 old++;
341 len++;
342 continue;
344 else {
345 /* hoo boy, here we go... */
347 HUGEINTVAL sharedint = 0;
349 /* Storage for flags, etc. */
350 SpfInfo info = { 0, 0, 0, 0, (PHASE)0 };
352 /* Reset temporaries */
353 tc[0] = '\0';
355 /* This can be really hard to understand, so I'll try to explain beforehand.
356 * A rough grammar for a printf format is:
358 * grammar Parrot::PrintF_Format {
359 * rule format {
360 * <other_stuff> (<field> <other_stuff>)*
363 * rule other_stuff {
364 * [<[^\%]> | \%\%]*:
367 * rule field {
368 * \%
369 * <flags>?
370 * <width>?
371 * [\.<prec>]?
372 * <size>?
373 * <term>
376 * rule flags {
377 * <[
378 * + # prefix with a + if necessary
379 * - # left-align
380 * 0 # zero-pad
381 * <sp> # space-pad
382 * \# # 0, 0x on octal, hex; force decimal point on float
383 * ]>+
386 * rule width {
387 * [\d|\*]+ # minimum width
390 * rule prec {
391 * [\d|\*]+ # width on integers;
392 * # number of digits after decimal on floats;
393 * # maximum width on strings
396 * rule size {
397 * <[
398 * h # short (or float)
399 * l # long
400 * H # HUGEwhateverVAL (long [long]?, [long]? double)
401 * v # whateverVAL
402 * O # opcode_t
403 * P # really a PMC
404 * S # Parrot string (only with %s)
405 * ]>
408 * rule term {
409 * <[
410 * c # char
411 * d # integer
412 * i # integer
413 * o # octal
414 * x # hex
415 * X # hex with capital X (if #)
416 * b # binary
417 * B # binary with capital B (if #)
418 * u # unsigned integer
419 * p # pointer
421 * e # 1e1
422 * E # 1E1
423 * f # 1.0
424 * g # 1, 0.1, 1e1
425 * G # 1, 0.1, 1E1
427 * s # string
428 * ]>
432 * Complication: once upon a time, %P existed. Now you should
433 * use %Ps, %Pd or %Pf, but we still need to support the old form.
434 * The same is true of %S--%Ss is the best form, but %S is still
435 * supported.
437 * The implementation of Parrot_vsprintf is surprisingly similar to this
438 * regex, even though the two were developed semi-independently.
439 * Parrot_vsprintf keeps track of what it expects to see next (the
440 * 'phase')--flags, width, precision, size, or field type (term). If it
441 * doesn't find a character that fits whatever it's expecting, it sets
442 * info.phase to the next thing and tries it. The first four phases just
443 * set flags--the last does all the work.
446 for (i++; i < pat_len && info.phase != PHASE_DONE; i++) {
447 const INTVAL ch = string_ord(interp, pat, i);
449 switch (info.phase) {
450 /*@fallthrough@ */ case PHASE_FLAGS:
451 switch (ch) {
452 case '-':
453 info.flags |= FLAG_MINUS;
454 continue;
456 case '+':
457 info.flags |= FLAG_PLUS;
458 continue;
460 case '0':
461 info.flags |= FLAG_ZERO;
462 continue;
464 case ' ':
465 info.flags |= FLAG_SPACE;
466 continue;
468 case '#':
469 info.flags |= FLAG_SHARP;
470 continue;
472 default:
473 info.phase = PHASE_WIDTH;
477 /*@fallthrough@ */ case PHASE_WIDTH:
478 switch (ch) {
479 case '0':
480 case '1':
481 case '2':
482 case '3':
483 case '4':
484 case '5':
485 case '6':
486 case '7':
487 case '8':
488 case '9':
489 info.flags |= FLAG_WIDTH;
490 info.width *= 10;
491 info.width += ch - '0';
492 continue;
494 case '*':
495 info.flags |= FLAG_WIDTH;
496 num = obj->getint(interp, SIZE_XVAL, obj);
497 if (num < 0) {
498 info.flags |= FLAG_MINUS;
499 info.width = -num;
501 else {
502 info.width = num;
504 continue;
506 case '.':
507 info.phase = PHASE_PREC;
508 continue;
510 default:
511 info.phase = PHASE_PREC;
515 /*@fallthrough@ */ case PHASE_PREC:
516 switch (ch) {
517 case '0':
518 case '1':
519 case '2':
520 case '3':
521 case '4':
522 case '5':
523 case '6':
524 case '7':
525 case '8':
526 case '9':
527 info.flags |= FLAG_PREC;
528 info.prec *= 10;
529 info.prec += ch - '0';
530 continue;
532 case '*':
533 info.flags |= FLAG_PREC;
534 info.prec = (UINTVAL)obj->getint(interp,
535 SIZE_XVAL, obj);
536 info.phase = PHASE_TYPE;
537 continue;
539 default:
540 info.phase = PHASE_TYPE;
543 /*@fallthrough@ */ case PHASE_TYPE:
544 switch (ch) {
545 case 'h':
546 info.type = SIZE_SHORT;
547 continue;
549 case 'l':
550 info.type = SIZE_LONG;
551 continue;
553 case 'L':
554 case 'H':
555 info.type = SIZE_HUGE;
556 continue;
558 case 'v':
559 info.type = SIZE_XVAL;
560 continue;
562 case 'O':
563 info.type = SIZE_OPCODE;
564 continue;
566 case 'P':
567 info.type = SIZE_PMC;
568 continue;
570 case 'S':
571 info.type = SIZE_PSTR;
572 continue;
574 default:
575 info.phase = PHASE_TERM;
579 /*@fallthrough@ */ case PHASE_TERM:
580 switch (ch) {
581 /* INTEGERS */
582 case 'c':
584 STRING * const ts = string_chr(interp,
585 (UINTVAL)obj->getint(interp, info.type, obj));
586 targ = str_append_w_flags(interp, targ, &info, ts, NULL);
588 break;
590 case 'o':
592 const UHUGEINTVAL theuint =
593 obj->getuint(interp, info.type, obj);
594 STRING * const ts =
595 uint_to_str(interp, tc, theuint, 8, 0);
596 STRING * const prefix = CONST_STRING(interp, "0");
598 /* unsigned conversion - no plus */
599 info.flags &= ~FLAG_PLUS;
600 targ = str_append_w_flags(interp, targ,
601 &info, ts, prefix);
603 break;
605 case 'x':
607 const UHUGEINTVAL theuint =
608 obj->getuint(interp, info.type, obj);
609 STRING * const ts =
610 uint_to_str(interp, tc, theuint, 16, 0);
611 STRING * const prefix = CONST_STRING(interp, "0x");
613 /* unsigned conversion - no plus */
614 info.flags &= ~FLAG_PLUS;
615 targ = str_append_w_flags(interp, targ,
616 &info, ts, prefix);
618 break;
620 case 'X':
622 STRING * const prefix = CONST_STRING(interp, "0X");
623 const UHUGEINTVAL theuint =
624 obj->getuint(interp, info.type, obj);
625 STRING * const ts =
626 uint_to_str(interp, tc, theuint, 16, 0);
627 string_upcase_inplace(interp, ts);
629 /* unsigned conversion - no plus */
630 info.flags &= ~FLAG_PLUS;
631 targ = str_append_w_flags(interp, targ,
632 &info, ts, prefix);
634 break;
636 case 'b':
638 STRING * const prefix = CONST_STRING(interp, "0b");
639 const UHUGEINTVAL theuint =
640 obj->getuint(interp, info.type, obj);
641 STRING * const ts =
642 uint_to_str(interp, tc, theuint, 2, 0);
644 /* unsigned conversion - no plus */
645 info.flags &= ~FLAG_PLUS;
646 targ = str_append_w_flags(interp, targ,
647 &info, ts, prefix);
649 break;
651 case 'B':
653 STRING * const prefix = CONST_STRING(interp, "0B");
654 const HUGEINTVAL theint =
655 obj->getint(interp, info.type, obj);
656 STRING * const ts =
657 int_to_str(interp, tc, theint, 2);
659 /* unsigned conversion - no plus */
660 info.flags &= ~FLAG_PLUS;
661 targ = str_append_w_flags(interp, targ,
662 &info, ts, prefix);
664 break;
666 case 'u':
668 const UHUGEINTVAL theuint =
669 obj->getuint(interp, info.type, obj);
670 sharedint = theuint;
672 goto do_sprintf;
673 case 'd':
674 case 'i':
676 /* EVIL: Work around bug in glibc that makes %0lld
677 * sometimes output an empty string. */
678 if (!(info.flags & FLAG_WIDTH))
679 info.flags &= ~FLAG_ZERO;
681 sharedint = obj->getint(interp, info.type, obj);
682 do_sprintf:
684 STRING *ts;
685 gen_sprintf_call(tc, &info, ch);
686 ts = cstr2pstr(tc);
688 char * const tempstr =
689 string_to_cstring(interp, ts);
691 #ifdef PARROT_HAS_SNPRINTF
692 snprintf(tc, PARROT_SPRINTF_BUFFER_SIZE,
693 tempstr, sharedint);
694 #else
695 /* the buffer is 4096, so no problem here */
696 sprintf(tc, tempstr, sharedint);
697 #endif
698 string_cstring_free(tempstr);
700 targ = string_append(interp, targ, cstr2pstr(tc));
702 break;
704 case 'p':
706 STRING * const prefix = CONST_STRING(interp, "0x");
707 const void * const ptr =
708 obj->getptr(interp, info.type, obj);
709 STRING * const ts = uint_to_str(interp, tc,
710 (HUGEINTVAL) (size_t) ptr, 16, 0);
712 targ = str_append_w_flags(interp, targ, &info,
713 ts, prefix);
715 break;
717 /* FLOATS - We cheat on these and use snprintf. */
718 case 'e':
719 case 'E':
720 case 'f':
721 case 'g':
722 case 'G':
724 STRING *ts;
725 const HUGEFLOATVAL thefloat =
726 obj->getfloat(interp, info.type, obj);
728 /* turn -0.0 into 0.0 */
729 gen_sprintf_call(tc, &info, ch);
730 ts = cstr2pstr(tc);
732 /* XXX lost precision if %Hg or whatever */
734 char * const tempstr =
735 string_to_cstring(interp, ts);
737 #ifdef PARROT_HAS_SNPRINTF
738 snprintf(tc, PARROT_SPRINTF_BUFFER_SIZE,
739 tempstr,
740 (double)thefloat);
741 #else
742 /* the buffer is 4096, so no problem here */
743 sprintf(tc, tempstr, (double)thefloat);
744 #endif
745 string_cstring_free(tempstr);
748 #ifdef WIN32
750 /* Microsoft defaults to three digits for
751 * exponents, even when fewer digits would suffice.
752 * For the sake of portability, we will here
753 * attempt to hide that. */
754 if (ch == 'g' || ch == 'G'
755 || ch == 'e' || ch == 'E') {
756 const size_t tclen = strlen(tc);
757 size_t j;
758 for (j = 0; j < tclen; j++) {
759 if ((tc[j] == 'e' || tc[j] == 'E')
760 && (tc[j+1] == '+' || tc[j+1] == '-')
761 && tc[j+2] == '0'
762 && isdigit((unsigned char)tc[j+3])
763 && isdigit((unsigned char)tc[j+4]))
765 mem_sys_memmove(&tc[j+2], &tc[j+3],
766 strlen(&tc[j+2]));
768 /* now fix any broken length */
770 if ((info.flags & FLAG_WIDTH)
771 && strlen(tc) < info.width) {
772 if (info.flags & FLAG_MINUS)
773 strcat(tc, " ");
774 else {
775 mem_sys_memmove(&tc[1], &tc[0],
776 strlen(tc) + 1);
777 tc[0] = (info.flags & FLAG_ZERO) ? '0' : ' ';
781 /* only one fix required per string */
782 break;
786 #endif /* WIN32 */
788 targ = string_append(interp, targ, cstr2pstr(tc));
790 break;
792 /* STRINGS */
793 case 'r': /* Python repr */
794 /* XXX the right fix is to add a getrepr entry *
795 * to SPRINTF_OBJ, but for now, getstring_pmc *
796 * is inlined and modified to call get_repr */
797 if (obj->getstring == pmc_core.getstring) {
798 PMC * const tmp =
799 VTABLE_get_pmc_keyed_int(interp,
800 ((PMC *)obj->data),
801 (obj->index));
803 STRING *string = (VTABLE_get_repr(interp, tmp));
804 STRING *ts = handle_flags(interp, &info,
805 string, 0, NULL);
806 obj->index++;
808 targ = string_append(interp, targ, ts);
809 break;
812 case 's':
813 CASE_s:
815 STRING * const string = obj->getstring(interp,
816 info.type, obj);
817 STRING * const ts = handle_flags(interp, &info,
818 string, 0, NULL);
819 targ = string_append(interp, targ, ts);
821 break;
823 default:
824 /* fake the old %P and %S commands */
825 if (info.type == SIZE_PMC
826 || info.type == SIZE_PSTR) {
827 i--;
828 goto CASE_s;
829 /* case 's' will see the SIZE_PMC or SIZE_PSTR
830 * and assume it was %Ps (or %Ss). Genius,
831 * no? */
833 else {
834 Parrot_ex_throw_from_c_args(interp, NULL,
835 EXCEPTION_INVALID_CHARACTER,
836 "'%c' is not a valid sprintf format", ch);
840 info.phase = PHASE_DONE;
841 break;
843 case PHASE_DONE:
844 default:
845 /* This is the terminating condition of the surrounding
846 * loop, so...
848 PANIC(interp, "We can't be here");
853 old = i;
854 i--;
856 else {
857 len++;
860 if (len) {
861 STRING *ignored = string_substr(interp, pat, old, len, &substr, 1);
862 UNUSED(ignored);
863 targ = string_append(interp, targ, substr);
866 return targ;
871 =back
873 =head1 SEE ALSO
875 F<src/misc.h>, F<src/misc.c>, F<src/spf_vtable.c>.
877 =cut
883 * Local variables:
884 * c-file-style: "parrot"
885 * End:
886 * vim: expandtab shiftwidth=4: