2 Copyright (C) 2001-2008, The Perl Foundation.
7 src/spf_render.c - Parrot sprintf
11 Implements the main function that drives the C<Parrot_sprintf> family
12 and its utility functions.
14 =head2 Utility Functions
24 #include "parrot/parrot.h"
25 #include "parrot/string_funcs.h"
26 #include "spf_render.str"
37 typedef struct SpfInfo_tag
{
55 /* HEADERIZER HFILE: include/parrot/misc.h */
57 /* HEADERIZER BEGIN: static */
58 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
60 static void gen_sprintf_call(
62 ARGMOD(SpfInfo
*info
),
64 __attribute__nonnull__(1)
65 __attribute__nonnull__(2)
69 PARROT_CANNOT_RETURN_NULL
70 static STRING
* handle_flags(PARROT_INTERP
,
71 ARGIN(const SpfInfo
*info
),
74 ARGIN_NULLOK(STRING
* prefix
))
75 __attribute__nonnull__(1)
76 __attribute__nonnull__(2)
77 __attribute__nonnull__(3)
80 PARROT_CANNOT_RETURN_NULL
81 static STRING
* str_append_w_flags(PARROT_INTERP
,
83 ARGIN(const SpfInfo
*info
),
85 ARGIN_NULLOK(STRING
*prefix
))
86 __attribute__nonnull__(1)
87 __attribute__nonnull__(2)
88 __attribute__nonnull__(3)
89 __attribute__nonnull__(4)
93 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
94 /* HEADERIZER END: static */
97 /* Per Dan's orders, we will not use sprintf if snprintf isn't
101 # define snprintf _snprintf
106 =item C<static STRING * handle_flags>
108 Handles C<+>, C<->, C<0>, C<#>, space, width, and prec.
114 PARROT_CANNOT_RETURN_NULL
116 handle_flags(PARROT_INTERP
, ARGIN(const SpfInfo
*info
), ARGMOD(STRING
*str
),
117 INTVAL is_int_type
, ARGIN_NULLOK(STRING
* prefix
))
119 UINTVAL len
= string_length(interp
, str
);
122 if (info
->flags
& FLAG_PREC
&& info
->prec
== 0 &&
124 string_ord(interp
, str
, 0) == '0') {
125 string_chopn_inplace(interp
, str
, len
);
129 if (!len
|| string_ord(interp
, str
, 0) != '-') {
130 if (info
->flags
& FLAG_PLUS
) {
131 STRING
* const cs
= CONST_STRING(interp
, "+");
132 str
= string_concat(interp
, cs
, str
, 0);
135 else if (info
->flags
& FLAG_SPACE
) {
136 STRING
* const cs
= CONST_STRING(interp
, " ");
137 str
= string_concat(interp
, cs
, str
, 0);
143 if ((info
->flags
& FLAG_SHARP
) && prefix
) {
144 str
= string_concat(interp
, prefix
, str
, 0);
145 len
+= string_length(interp
, prefix
);
147 /* XXX sharp + fill ??? */
150 /* precision - only for floats, which is handled elsewhere */
151 if (info
->flags
& FLAG_PREC
) {
152 info
->flags
|= FLAG_WIDTH
;
153 if (string_ord(interp
, str
, 0) == '-' ||
154 string_ord(interp
, str
, 0) == '+') {
155 info
->width
= info
->prec
+ 1;
158 info
->width
= info
->prec
;
164 /* string precision */
165 if (info
->flags
& FLAG_PREC
&& info
->prec
== 0) {
166 string_chopn_inplace(interp
, str
, len
);
170 if (info
->flags
& FLAG_PREC
&& info
->prec
< len
) {
171 string_chopn_inplace(interp
, str
, -(INTVAL
)(info
->prec
));
176 if ((info
->flags
& FLAG_WIDTH
) && info
->width
> len
) {
177 STRING
* const filler
=
178 ((info
->flags
& FLAG_ZERO
) && !(info
->flags
& FLAG_MINUS
))
179 ? CONST_STRING(interp
, "0")
180 : CONST_STRING(interp
, " ");
181 STRING
* const fill
= string_repeat(interp
, filler
, info
->width
- len
, NULL
);
183 if (info
->flags
& FLAG_MINUS
) { /* left-align */
184 str
= string_concat(interp
, str
, fill
, 0);
186 else { /* right-align */
187 /* signed and zero padded */
188 if (info
->flags
& FLAG_ZERO
189 && (string_ord(interp
, str
, 0) == '-' ||
190 string_ord(interp
, str
, 0) == '+')) {
193 ignored
= string_substr(interp
, str
, 1, len
-1, &temp
, 0);
195 string_chopn_inplace(interp
, str
, -1);
196 str
= string_append(interp
, str
, fill
);
197 str
= string_append(interp
, str
, temp
);
200 str
= string_concat(interp
, fill
, str
, 0);
209 =item C<static STRING* str_append_w_flags>
211 Used by Parrot_sprintf_format. Prepends supplied prefix for numeric
212 values. (e.g. 0x for hex.)
214 Returns the pointer to the modified string.
220 PARROT_CANNOT_RETURN_NULL
222 str_append_w_flags(PARROT_INTERP
, ARGOUT(STRING
*dest
), ARGIN(const SpfInfo
*info
),
223 ARGMOD(STRING
*src
), ARGIN_NULLOK(STRING
*prefix
))
225 src
= handle_flags(interp
, info
, src
, 1, prefix
);
226 dest
= string_append(interp
, dest
, src
);
232 =item C<static void gen_sprintf_call>
234 Turn the info structure back into an sprintf format. Far from being
235 pointless, this is used to call C<snprintf()> when we're confronted with
243 gen_sprintf_call(ARGOUT(char *out
), ARGMOD(SpfInfo
*info
), int thingy
)
249 if (info
->flags
& FLAG_MINUS
)
252 if (info
->flags
& FLAG_PLUS
)
255 if (info
->flags
& FLAG_ZERO
)
258 if (info
->flags
& FLAG_SPACE
)
261 if (info
->flags
& FLAG_SHARP
)
265 if (info
->flags
& FLAG_WIDTH
) {
266 if (info
->width
> PARROT_SPRINTF_BUFFER_SIZE
- 1)
267 info
->width
= PARROT_SPRINTF_BUFFER_SIZE
;
269 i
+= sprintf(out
+ i
, "%u", (unsigned)info
->width
);
272 if (info
->flags
& FLAG_PREC
) {
273 if (info
->prec
> PARROT_SPRINTF_MAX_PREC
)
274 info
->prec
= PARROT_SPRINTF_MAX_PREC
;
277 i
+= sprintf(out
+ i
, "%u", (unsigned)info
->prec
);
280 if (thingy
== 'd' || thingy
== 'i' ||thingy
== 'u') {
281 /* the u?int isa HUGEU?INTVAL aka long long
282 * the 'll' modifier is specced in susv3 - hopefully all our
283 * compilers support it too */
288 out
[i
++] = (char)thingy
;
295 =item C<STRING * Parrot_sprintf_format>
297 This is the engine that does all the formatting.
303 PARROT_WARN_UNUSED_RESULT
304 PARROT_CANNOT_RETURN_NULL
306 Parrot_sprintf_format(PARROT_INTERP
,
307 ARGIN(STRING
*pat
), ARGIN(SPRINTF_OBJ
*obj
))
312 INTVAL pat_len
= (INTVAL
)string_length(interp
, pat
);
315 /* start with a buffer; double the pattern length to avoid realloc #1 */
316 STRING
*targ
= string_make_empty(interp
, enum_stringrep_one
, pat_len
<< 1);
318 /* ts is used almost universally as an intermediate target;
319 * tc is used as a temporary buffer by uint_to_string and
320 * as a target by gen_sprintf_call.
322 STRING
*substr
= NULL
;
323 char tc
[PARROT_SPRINTF_BUFFER_SIZE
];
325 for (i
= 0; i
< pat_len
; i
++) {
326 if (string_ord(interp
, pat
, i
) == '%') { /* % */
329 = string_substr(interp
, pat
, old
, len
, &substr
, 1);
331 /* XXX This shouldn't modify targ the pointer */
332 targ
= string_append(interp
, targ
, substr
);
336 if (string_ord(interp
, pat
, i
+ 1) == '%') {
337 /* skip this one, make next the first char
338 * of literal sequence, starting at old */
345 /* hoo boy, here we go... */
347 HUGEINTVAL sharedint
= 0;
349 /* Storage for flags, etc. */
350 SpfInfo info
= { 0, 0, 0, 0, (PHASE
)0 };
352 /* Reset temporaries */
355 /* This can be really hard to understand, so I'll try to explain beforehand.
356 * A rough grammar for a printf format is:
358 * grammar Parrot::PrintF_Format {
360 * <other_stuff> (<field> <other_stuff>)*
378 * + # prefix with a + if necessary
382 * \# # 0, 0x on octal, hex; force decimal point on float
387 * [\d|\*]+ # minimum width
391 * [\d|\*]+ # width on integers;
392 * # number of digits after decimal on floats;
393 * # maximum width on strings
398 * h # short (or float)
400 * H # HUGEwhateverVAL (long [long]?, [long]? double)
404 * S # Parrot string (only with %s)
415 * X # hex with capital X (if #)
417 * B # binary with capital B (if #)
418 * u # unsigned integer
432 * Complication: once upon a time, %P existed. Now you should
433 * use %Ps, %Pd or %Pf, but we still need to support the old form.
434 * The same is true of %S--%Ss is the best form, but %S is still
437 * The implementation of Parrot_vsprintf is surprisingly similar to this
438 * regex, even though the two were developed semi-independently.
439 * Parrot_vsprintf keeps track of what it expects to see next (the
440 * 'phase')--flags, width, precision, size, or field type (term). If it
441 * doesn't find a character that fits whatever it's expecting, it sets
442 * info.phase to the next thing and tries it. The first four phases just
443 * set flags--the last does all the work.
446 for (i
++; i
< pat_len
&& info
.phase
!= PHASE_DONE
; i
++) {
447 const INTVAL ch
= string_ord(interp
, pat
, i
);
449 switch (info
.phase
) {
450 /*@fallthrough@ */ case PHASE_FLAGS
:
453 info
.flags
|= FLAG_MINUS
;
457 info
.flags
|= FLAG_PLUS
;
461 info
.flags
|= FLAG_ZERO
;
465 info
.flags
|= FLAG_SPACE
;
469 info
.flags
|= FLAG_SHARP
;
473 info
.phase
= PHASE_WIDTH
;
477 /*@fallthrough@ */ case PHASE_WIDTH
:
489 info
.flags
|= FLAG_WIDTH
;
491 info
.width
+= ch
- '0';
495 info
.flags
|= FLAG_WIDTH
;
496 num
= obj
->getint(interp
, SIZE_XVAL
, obj
);
498 info
.flags
|= FLAG_MINUS
;
507 info
.phase
= PHASE_PREC
;
511 info
.phase
= PHASE_PREC
;
515 /*@fallthrough@ */ case PHASE_PREC
:
527 info
.flags
|= FLAG_PREC
;
529 info
.prec
+= ch
- '0';
533 info
.flags
|= FLAG_PREC
;
534 info
.prec
= (UINTVAL
)obj
->getint(interp
,
536 info
.phase
= PHASE_TYPE
;
540 info
.phase
= PHASE_TYPE
;
543 /*@fallthrough@ */ case PHASE_TYPE
:
546 info
.type
= SIZE_SHORT
;
550 info
.type
= SIZE_LONG
;
555 info
.type
= SIZE_HUGE
;
559 info
.type
= SIZE_XVAL
;
563 info
.type
= SIZE_OPCODE
;
567 info
.type
= SIZE_PMC
;
571 info
.type
= SIZE_PSTR
;
575 info
.phase
= PHASE_TERM
;
579 /*@fallthrough@ */ case PHASE_TERM
:
584 STRING
* const ts
= string_chr(interp
,
585 (UINTVAL
)obj
->getint(interp
, info
.type
, obj
));
586 targ
= str_append_w_flags(interp
, targ
, &info
, ts
, NULL
);
592 const UHUGEINTVAL theuint
=
593 obj
->getuint(interp
, info
.type
, obj
);
595 uint_to_str(interp
, tc
, theuint
, 8, 0);
596 STRING
* const prefix
= CONST_STRING(interp
, "0");
598 /* unsigned conversion - no plus */
599 info
.flags
&= ~FLAG_PLUS
;
600 targ
= str_append_w_flags(interp
, targ
,
607 const UHUGEINTVAL theuint
=
608 obj
->getuint(interp
, info
.type
, obj
);
610 uint_to_str(interp
, tc
, theuint
, 16, 0);
611 STRING
* const prefix
= CONST_STRING(interp
, "0x");
613 /* unsigned conversion - no plus */
614 info
.flags
&= ~FLAG_PLUS
;
615 targ
= str_append_w_flags(interp
, targ
,
622 STRING
* const prefix
= CONST_STRING(interp
, "0X");
623 const UHUGEINTVAL theuint
=
624 obj
->getuint(interp
, info
.type
, obj
);
626 uint_to_str(interp
, tc
, theuint
, 16, 0);
627 string_upcase_inplace(interp
, ts
);
629 /* unsigned conversion - no plus */
630 info
.flags
&= ~FLAG_PLUS
;
631 targ
= str_append_w_flags(interp
, targ
,
638 STRING
* const prefix
= CONST_STRING(interp
, "0b");
639 const UHUGEINTVAL theuint
=
640 obj
->getuint(interp
, info
.type
, obj
);
642 uint_to_str(interp
, tc
, theuint
, 2, 0);
644 /* unsigned conversion - no plus */
645 info
.flags
&= ~FLAG_PLUS
;
646 targ
= str_append_w_flags(interp
, targ
,
653 STRING
* const prefix
= CONST_STRING(interp
, "0B");
654 const HUGEINTVAL theint
=
655 obj
->getint(interp
, info
.type
, obj
);
657 int_to_str(interp
, tc
, theint
, 2);
659 /* unsigned conversion - no plus */
660 info
.flags
&= ~FLAG_PLUS
;
661 targ
= str_append_w_flags(interp
, targ
,
668 const UHUGEINTVAL theuint
=
669 obj
->getuint(interp
, info
.type
, obj
);
676 /* EVIL: Work around bug in glibc that makes %0lld
677 * sometimes output an empty string. */
678 if (!(info
.flags
& FLAG_WIDTH
))
679 info
.flags
&= ~FLAG_ZERO
;
681 sharedint
= obj
->getint(interp
, info
.type
, obj
);
685 gen_sprintf_call(tc
, &info
, ch
);
688 char * const tempstr
=
689 string_to_cstring(interp
, ts
);
691 #ifdef PARROT_HAS_SNPRINTF
692 snprintf(tc
, PARROT_SPRINTF_BUFFER_SIZE
,
695 /* the buffer is 4096, so no problem here */
696 sprintf(tc
, tempstr
, sharedint
);
698 string_cstring_free(tempstr
);
700 targ
= string_append(interp
, targ
, cstr2pstr(tc
));
706 STRING
* const prefix
= CONST_STRING(interp
, "0x");
707 const void * const ptr
=
708 obj
->getptr(interp
, info
.type
, obj
);
709 STRING
* const ts
= uint_to_str(interp
, tc
,
710 (HUGEINTVAL
) (size_t) ptr
, 16, 0);
712 targ
= str_append_w_flags(interp
, targ
, &info
,
717 /* FLOATS - We cheat on these and use snprintf. */
725 const HUGEFLOATVAL thefloat
=
726 obj
->getfloat(interp
, info
.type
, obj
);
728 /* turn -0.0 into 0.0 */
729 gen_sprintf_call(tc
, &info
, ch
);
732 /* XXX lost precision if %Hg or whatever */
734 char * const tempstr
=
735 string_to_cstring(interp
, ts
);
737 #ifdef PARROT_HAS_SNPRINTF
738 snprintf(tc
, PARROT_SPRINTF_BUFFER_SIZE
,
742 /* the buffer is 4096, so no problem here */
743 sprintf(tc
, tempstr
, (double)thefloat
);
745 string_cstring_free(tempstr
);
750 /* Microsoft defaults to three digits for
751 * exponents, even when fewer digits would suffice.
752 * For the sake of portability, we will here
753 * attempt to hide that. */
754 if (ch
== 'g' || ch
== 'G'
755 || ch
== 'e' || ch
== 'E') {
756 const size_t tclen
= strlen(tc
);
758 for (j
= 0; j
< tclen
; j
++) {
759 if ((tc
[j
] == 'e' || tc
[j
] == 'E')
760 && (tc
[j
+1] == '+' || tc
[j
+1] == '-')
762 && isdigit((unsigned char)tc
[j
+3])
763 && isdigit((unsigned char)tc
[j
+4]))
765 mem_sys_memmove(&tc
[j
+2], &tc
[j
+3],
768 /* now fix any broken length */
770 if ((info
.flags
& FLAG_WIDTH
)
771 && strlen(tc
) < info
.width
) {
772 if (info
.flags
& FLAG_MINUS
)
775 mem_sys_memmove(&tc
[1], &tc
[0],
777 tc
[0] = (info
.flags
& FLAG_ZERO
) ? '0' : ' ';
781 /* only one fix required per string */
788 targ
= string_append(interp
, targ
, cstr2pstr(tc
));
793 case 'r': /* Python repr */
794 /* XXX the right fix is to add a getrepr entry *
795 * to SPRINTF_OBJ, but for now, getstring_pmc *
796 * is inlined and modified to call get_repr */
797 if (obj
->getstring
== pmc_core
.getstring
) {
799 VTABLE_get_pmc_keyed_int(interp
,
803 STRING
*string
= (VTABLE_get_repr(interp
, tmp
));
804 STRING
*ts
= handle_flags(interp
, &info
,
808 targ
= string_append(interp
, targ
, ts
);
815 STRING
* const string
= obj
->getstring(interp
,
817 STRING
* const ts
= handle_flags(interp
, &info
,
819 targ
= string_append(interp
, targ
, ts
);
824 /* fake the old %P and %S commands */
825 if (info
.type
== SIZE_PMC
826 || info
.type
== SIZE_PSTR
) {
829 /* case 's' will see the SIZE_PMC or SIZE_PSTR
830 * and assume it was %Ps (or %Ss). Genius,
834 Parrot_ex_throw_from_c_args(interp
, NULL
,
835 EXCEPTION_INVALID_CHARACTER
,
836 "'%c' is not a valid sprintf format", ch
);
840 info
.phase
= PHASE_DONE
;
845 /* This is the terminating condition of the surrounding
848 PANIC(interp
, "We can't be here");
861 STRING
*ignored
= string_substr(interp
, pat
, old
, len
, &substr
, 1);
863 targ
= string_append(interp
, targ
, substr
);
875 F<src/misc.h>, F<src/misc.c>, F<src/spf_vtable.c>.
884 * c-file-style: "parrot"
886 * vim: expandtab shiftwidth=4: