2 Copyright (C) 2001-2009, Parrot Foundation.
7 src/spf_render.c - Parrot sprintf
11 Implements the main function that drives the C<Parrot_sprintf> family
12 and its utility functions.
14 =head2 Utility Functions
24 #include "parrot/parrot.h"
25 #include "spf_render.str"
36 typedef struct SpfInfo_tag
{
54 /* HEADERIZER HFILE: include/parrot/misc.h */
56 /* HEADERIZER BEGIN: static */
57 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
59 static void canonicalize_exponent(PARROT_INTERP
,
62 __attribute__nonnull__(1)
63 __attribute__nonnull__(2)
64 __attribute__nonnull__(3)
67 static void gen_sprintf_call(
69 ARGMOD(SpfInfo
*info
),
71 __attribute__nonnull__(1)
72 __attribute__nonnull__(2)
76 PARROT_WARN_UNUSED_RESULT
77 PARROT_CANNOT_RETURN_NULL
78 static STRING
* handle_flags(PARROT_INTERP
,
79 ARGIN(const SpfInfo
*info
),
82 ARGIN_NULLOK(STRING
* prefix
))
83 __attribute__nonnull__(1)
84 __attribute__nonnull__(2)
85 __attribute__nonnull__(3);
87 PARROT_CANNOT_RETURN_NULL
88 static STRING
* str_concat_w_flags(PARROT_INTERP
,
90 ARGIN(const SpfInfo
*info
),
92 ARGIN_NULLOK(STRING
*prefix
))
93 __attribute__nonnull__(1)
94 __attribute__nonnull__(2)
95 __attribute__nonnull__(3)
96 __attribute__nonnull__(4)
100 #define ASSERT_ARGS_canonicalize_exponent __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
101 PARROT_ASSERT_ARG(interp) \
102 , PARROT_ASSERT_ARG(tc) \
103 , PARROT_ASSERT_ARG(info))
104 #define ASSERT_ARGS_gen_sprintf_call __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
105 PARROT_ASSERT_ARG(out) \
106 , PARROT_ASSERT_ARG(info))
107 #define ASSERT_ARGS_handle_flags __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
108 PARROT_ASSERT_ARG(interp) \
109 , PARROT_ASSERT_ARG(info) \
110 , PARROT_ASSERT_ARG(str))
111 #define ASSERT_ARGS_str_concat_w_flags __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
112 PARROT_ASSERT_ARG(interp) \
113 , PARROT_ASSERT_ARG(dest) \
114 , PARROT_ASSERT_ARG(info) \
115 , PARROT_ASSERT_ARG(src))
116 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
117 /* HEADERIZER END: static */
120 /* Per Dan's orders, we will not use sprintf if snprintf isn't
124 # define snprintf _snprintf
129 =item C<static STRING * handle_flags(PARROT_INTERP, const SpfInfo *info, STRING
130 *str, INTVAL is_int_type, STRING* prefix)>
132 Handles C<+>, C<->, C<0>, C<#>, space, width, and prec.
138 PARROT_WARN_UNUSED_RESULT
139 PARROT_CANNOT_RETURN_NULL
141 handle_flags(PARROT_INTERP
, ARGIN(const SpfInfo
*info
), ARGIN(STRING
*str
),
142 INTVAL is_int_type
, ARGIN_NULLOK(STRING
* prefix
))
144 ASSERT_ARGS(handle_flags
)
145 UINTVAL len
= Parrot_str_byte_length(interp
, str
);
148 if (info
->flags
& FLAG_PREC
&& info
->prec
== 0 &&
150 string_ord(interp
, str
, 0) == '0') {
151 str
= Parrot_str_chopn(interp
, str
, len
);
155 if (!len
|| string_ord(interp
, str
, 0) != '-') {
156 if (info
->flags
& FLAG_PLUS
) {
157 STRING
* const cs
= CONST_STRING(interp
, "+");
158 str
= Parrot_str_concat(interp
, cs
, str
);
161 else if (info
->flags
& FLAG_SPACE
) {
162 STRING
* const cs
= CONST_STRING(interp
, " ");
163 str
= Parrot_str_concat(interp
, cs
, str
);
169 if ((info
->flags
& FLAG_SHARP
) && prefix
) {
170 str
= Parrot_str_concat(interp
, prefix
, str
);
171 len
+= Parrot_str_byte_length(interp
, prefix
);
173 /* XXX sharp + fill ??? */
176 /* string precision */
177 if (info
->flags
& FLAG_PREC
&& info
->prec
== 0) {
178 str
= Parrot_str_chopn(interp
, str
, len
);
181 else if (info
->flags
& FLAG_PREC
&& info
->prec
< len
) {
182 str
= Parrot_str_chopn(interp
, str
, -(INTVAL
)(info
->prec
));
187 if ((info
->flags
& FLAG_WIDTH
) && info
->width
> len
) {
188 STRING
* const filler
=
189 ((info
->flags
& FLAG_ZERO
) && !(info
->flags
& FLAG_MINUS
))
190 ? CONST_STRING(interp
, "0")
191 : CONST_STRING(interp
, " ");
192 STRING
* const fill
= Parrot_str_repeat(interp
, filler
, info
->width
- len
);
194 if (info
->flags
& FLAG_MINUS
) { /* left-align */
195 str
= Parrot_str_concat(interp
, str
, fill
);
197 else { /* right-align */
198 /* signed and zero padded */
199 if (info
->flags
& FLAG_ZERO
200 && (string_ord(interp
, str
, 0) == '-' ||
201 string_ord(interp
, str
, 0) == '+')) {
204 temp
= Parrot_str_substr(interp
, str
, 1, len
-1);
205 str
= Parrot_str_chopn(interp
, str
, -1);
206 str
= Parrot_str_concat(interp
, str
, fill
);
207 str
= Parrot_str_concat(interp
, str
, temp
);
210 str
= Parrot_str_concat(interp
, fill
, str
);
219 =item C<static STRING* str_concat_w_flags(PARROT_INTERP, STRING *dest, const
220 SpfInfo *info, STRING *src, STRING *prefix)>
222 Used by Parrot_sprintf_format. Prepends supplied prefix for numeric
223 values. (e.g. 0x for hex.)
225 Returns the pointer to the modified string.
231 PARROT_CANNOT_RETURN_NULL
233 str_concat_w_flags(PARROT_INTERP
, ARGOUT(STRING
*dest
), ARGIN(const SpfInfo
*info
),
234 ARGMOD(STRING
*src
), ARGIN_NULLOK(STRING
*prefix
))
236 ASSERT_ARGS(str_concat_w_flags
)
237 src
= handle_flags(interp
, info
, src
, 1, prefix
);
238 dest
= Parrot_str_concat(interp
, dest
, src
);
244 =item C<static void gen_sprintf_call(char *out, SpfInfo *info, int thingy)>
246 Turn the info structure back into an sprintf format. Far from being
247 pointless, this is used to call C<snprintf()> when we're confronted with
255 gen_sprintf_call(ARGOUT(char *out
), ARGMOD(SpfInfo
*info
), int thingy
)
257 ASSERT_ARGS(gen_sprintf_call
)
259 const int flags
= info
->flags
;
264 if (flags
& FLAG_MINUS
)
267 if (flags
& FLAG_PLUS
)
270 if (flags
& FLAG_ZERO
)
273 if (flags
& FLAG_SPACE
)
276 if (flags
& FLAG_SHARP
)
279 if (flags
& FLAG_WIDTH
) {
280 if (info
->width
> PARROT_SPRINTF_BUFFER_SIZE
- 1)
281 info
->width
= PARROT_SPRINTF_BUFFER_SIZE
;
283 p
+= sprintf(p
, "%u", (unsigned)info
->width
);
286 if (flags
& FLAG_PREC
) {
287 if (info
->prec
> PARROT_SPRINTF_MAX_PREC
)
288 info
->prec
= PARROT_SPRINTF_MAX_PREC
;
291 p
+= sprintf(p
, "%u", (unsigned)info
->prec
);
295 if (thingy
== 'd' || thingy
== 'i' ||thingy
== 'u') {
296 /* the u?int isa HUGEU?INTVAL aka long long
297 * the 'll' modifier is specced in susv3 - hopefully all our
298 * compilers support it too */
309 =item C<static void canonicalize_exponent(PARROT_INTERP, char *tc, SpfInfo
312 This function is called to canonicalize any exponent in a formatted
313 float. PARROT_SPRINTF_EXP_DIGITS specifies the standard number of
314 exponent digits that we want. Remember that the exponent has the
315 form '...Esddd ', where 's' is the sign, 'ddd' is some number of digits,
316 and there may be trailing spaces
323 canonicalize_exponent(PARROT_INTERP
, ARGMOD(char *tc
), ARGIN(SpfInfo
*info
))
325 ASSERT_ARGS(canonicalize_exponent
)
327 const size_t exp_digits
= PARROT_SPRINTF_EXP_DIGITS
;
328 size_t len
= strlen(tc
),
335 /* Scan the formatted number backward to find the positions of the
336 last digit, leftmost non-0 exponent digit, sign, and E. */
338 for (i
= len
-1; i
>= 0 && e_pos
== 0; --i
) {
340 case '1': case '2': case '3':
341 case '4': case '5': case '6':
342 case '7': case '8': case '9': non0_pos
= i
;
345 case '0': if (last_pos
== len
) last_pos
= i
;
348 case '+': case '-': sign_pos
= i
;
351 case 'E': case 'e': e_pos
= i
;
358 /* If there is an E, and it is followed by a sign, and there are
359 leading zeroes on the exponent, and there are more than the
360 standard number of exponent digits, then we have work to do. */
362 if (e_pos
!= 0 && sign_pos
== e_pos
+ 1 &&
363 non0_pos
> sign_pos
+ 1 &&
364 last_pos
- sign_pos
> exp_digits
) {
366 /* Close up to eliminate excess exponent digits and
367 adjust the length. Don't forget to move the NUL. */
369 size_t keep
= (last_pos
- non0_pos
+ 1 > exp_digits
)
371 : exp_digits
+ (len
- last_pos
- 1);
373 mem_sys_memmove(&tc
[sign_pos
+1], &tc
[len
- keep
], keep
+1);
374 len
= sign_pos
+ 1 + keep
;
376 /* If it's a fixed-width field and we're too short now,
377 we have more work to do. If the field is left-justified,
378 pad the number on the right. Otherwise pad the number on
379 the left, possibly with leading zeroes. */
381 if ((info
->flags
& FLAG_WIDTH
) && len
< info
->width
) {
382 if (info
->flags
& FLAG_MINUS
) {
383 while (len
< info
->width
) {
390 mem_sys_memmove(&tc
[info
->width
- len
], &tc
[0], len
+1);
391 for (i
= 0; i
< info
->width
- len
; ++i
)
392 tc
[i
] = (info
->flags
& FLAG_ZERO
) ? '0' : ' ';
400 =item C<STRING * Parrot_sprintf_format(PARROT_INTERP, const STRING *pat,
403 This is the engine that does all the formatting.
409 PARROT_WARN_UNUSED_RESULT
410 PARROT_CANNOT_RETURN_NULL
412 Parrot_sprintf_format(PARROT_INTERP
, ARGIN(const STRING
*pat
), ARGMOD(SPRINTF_OBJ
*obj
))
414 ASSERT_ARGS(Parrot_sprintf_format
)
418 const INTVAL pat_len
= (INTVAL
)Parrot_str_byte_length(interp
, pat
);
421 /* start with a buffer; double the pattern length to avoid realloc #1 */
422 STRING
*targ
= Parrot_str_new_noinit(interp
, pat_len
* 2);
424 /* ts is used almost universally as an intermediate target;
425 * tc is used as a temporary buffer by Parrot_str_from_uint and
426 * as a target by gen_sprintf_call.
428 STRING
*substr
= NULL
;
429 char tc
[PARROT_SPRINTF_BUFFER_SIZE
];
431 for (i
= 0; i
< pat_len
; ++i
) {
432 if (string_ord(interp
, pat
, i
) == '%') { /* % */
434 substr
= Parrot_str_substr(interp
, pat
, old
, len
);
435 /* XXX This shouldn't modify targ the pointer */
436 targ
= Parrot_str_concat(interp
, targ
, substr
);
440 if (string_ord(interp
, pat
, i
+ 1) == '%') {
441 /* skip this one, make next the first char
442 * of literal sequence, starting at old */
449 /* hoo boy, here we go... */
451 HUGEINTVAL sharedint
= 0;
453 /* Storage for flags, etc. */
454 SpfInfo info
= { 0, 0, 0, 0, (PHASE
)0 };
456 /* Reset temporaries */
459 /* This can be really hard to understand, so I'll try to explain beforehand.
460 * A rough grammar for a printf format is:
462 * grammar Parrot::PrintF_Format {
464 * <other_stuff> (<field> <other_stuff>)*
482 * + # prefix with a + if necessary
486 * \# # 0, 0x on octal, hex; force decimal point on float
491 * [\d|\*]+ # minimum width
495 * [\d|\*]+ # width on integers;
496 * # number of digits after decimal on floats;
497 * # maximum width on strings
502 * h # short (or float)
504 * H # HUGEwhateverVAL (long [long]?, [long]? double)
508 * S # Parrot string (only with %s)
519 * X # hex with capital X (if #)
521 * B # binary with capital B (if #)
522 * u # unsigned integer
536 * Complication: once upon a time, %P existed. Now you should
537 * use %Ps, %Pd or %Pf, but we still need to support the old form.
538 * The same is true of %S--%Ss is the best form, but %S is still
541 * The implementation of Parrot_vsprintf is surprisingly similar to this
542 * regex, even though the two were developed semi-independently.
543 * Parrot_vsprintf keeps track of what it expects to see next (the
544 * 'phase')--flags, width, precision, size, or field type (term). If it
545 * doesn't find a character that fits whatever it's expecting, it sets
546 * info.phase to the next thing and tries it. The first four phases just
547 * set flags--the last does all the work.
550 for (++i
; i
< pat_len
&& info
.phase
!= PHASE_DONE
; ++i
) {
551 const INTVAL ch
= string_ord(interp
, pat
, i
);
553 switch (info
.phase
) {
554 /*@fallthrough@ */ case PHASE_FLAGS
:
557 info
.flags
|= FLAG_MINUS
;
561 info
.flags
|= FLAG_PLUS
;
565 info
.flags
|= FLAG_ZERO
;
569 info
.flags
|= FLAG_SPACE
;
573 info
.flags
|= FLAG_SHARP
;
577 info
.phase
= PHASE_WIDTH
;
581 /*@fallthrough@ */ case PHASE_WIDTH
:
593 info
.flags
|= FLAG_WIDTH
;
595 info
.width
+= ch
- '0';
599 info
.flags
|= FLAG_WIDTH
;
600 num
= obj
->getint(interp
, SIZE_XVAL
, obj
);
602 info
.flags
|= FLAG_MINUS
;
611 info
.phase
= PHASE_PREC
;
615 info
.phase
= PHASE_PREC
;
619 /*@fallthrough@ */ case PHASE_PREC
:
631 info
.flags
|= FLAG_PREC
;
633 info
.prec
+= ch
- '0';
637 info
.flags
|= FLAG_PREC
;
638 info
.prec
= (UINTVAL
)obj
->getint(interp
,
640 info
.phase
= PHASE_TYPE
;
644 info
.phase
= PHASE_TYPE
;
647 /*@fallthrough@ */ case PHASE_TYPE
:
650 info
.type
= SIZE_SHORT
;
654 info
.type
= SIZE_LONG
;
659 info
.type
= SIZE_HUGE
;
663 info
.type
= SIZE_XVAL
;
667 info
.type
= SIZE_OPCODE
;
671 info
.type
= SIZE_PMC
;
675 info
.type
= SIZE_PSTR
;
679 info
.phase
= PHASE_TERM
;
683 /*@fallthrough@ */ case PHASE_TERM
:
688 STRING
* const ts
= string_chr(interp
,
689 (UINTVAL
)obj
->getint(interp
, info
.type
, obj
));
690 targ
= str_concat_w_flags(interp
, targ
, &info
, ts
, NULL
);
696 const UHUGEINTVAL theuint
=
697 obj
->getuint(interp
, info
.type
, obj
);
699 Parrot_str_from_uint(interp
, tc
, theuint
, 8, 0);
700 STRING
* const prefix
= CONST_STRING(interp
, "0");
702 /* unsigned conversion - no plus */
703 info
.flags
&= ~FLAG_PLUS
;
704 targ
= str_concat_w_flags(interp
, targ
,
711 const UHUGEINTVAL theuint
=
712 obj
->getuint(interp
, info
.type
, obj
);
714 Parrot_str_from_uint(interp
, tc
, theuint
, 16, 0);
715 STRING
* const prefix
= CONST_STRING(interp
, "0x");
717 /* unsigned conversion - no plus */
718 info
.flags
&= ~FLAG_PLUS
;
719 targ
= str_concat_w_flags(interp
, targ
,
726 STRING
* const prefix
= CONST_STRING(interp
, "0X");
727 const UHUGEINTVAL theuint
=
728 obj
->getuint(interp
, info
.type
, obj
);
730 Parrot_str_from_uint(interp
, tc
, theuint
, 16, 0);
731 ts
= Parrot_str_upcase(interp
, ts
);
733 /* unsigned conversion - no plus */
734 info
.flags
&= ~FLAG_PLUS
;
735 targ
= str_concat_w_flags(interp
, targ
,
742 STRING
* const prefix
= CONST_STRING(interp
, "0b");
743 const UHUGEINTVAL theuint
=
744 obj
->getuint(interp
, info
.type
, obj
);
746 Parrot_str_from_uint(interp
, tc
, theuint
, 2, 0);
748 /* unsigned conversion - no plus */
749 info
.flags
&= ~FLAG_PLUS
;
750 targ
= str_concat_w_flags(interp
, targ
,
757 STRING
* const prefix
= CONST_STRING(interp
, "0B");
758 const HUGEINTVAL theint
=
759 obj
->getint(interp
, info
.type
, obj
);
761 Parrot_str_from_int_base(interp
, tc
, theint
, 2);
763 /* unsigned conversion - no plus */
764 info
.flags
&= ~FLAG_PLUS
;
765 targ
= str_concat_w_flags(interp
, targ
,
772 const UHUGEINTVAL theuint
=
773 obj
->getuint(interp
, info
.type
, obj
);
780 /* EVIL: Work around bug in glibc that makes %0lld
781 * sometimes output an empty string. */
782 if (!(info
.flags
& FLAG_WIDTH
))
783 info
.flags
&= ~FLAG_ZERO
;
785 sharedint
= obj
->getint(interp
, info
.type
, obj
);
789 gen_sprintf_call(tc
, &info
, ch
);
792 char * const tempstr
=
793 Parrot_str_to_cstring(interp
, ts
);
795 #ifdef PARROT_HAS_SNPRINTF
796 snprintf(tc
, PARROT_SPRINTF_BUFFER_SIZE
,
799 /* the buffer is 4096, so no problem here */
800 sprintf(tc
, tempstr
, sharedint
);
802 Parrot_str_free_cstring(tempstr
);
804 targ
= Parrot_str_concat(interp
, targ
, cstr2pstr(tc
));
810 STRING
* const prefix
= CONST_STRING(interp
, "0x");
811 const void * const ptr
=
812 obj
->getptr(interp
, info
.type
, obj
);
813 STRING
* const ts
= Parrot_str_from_uint(interp
, tc
,
814 (UHUGEINTVAL
) (size_t) ptr
, 16, 0);
816 targ
= str_concat_w_flags(interp
, targ
, &info
,
821 /* FLOATS - We cheat on these and use snprintf. */
829 const HUGEFLOATVAL thefloat
=
830 obj
->getfloat(interp
, info
.type
, obj
);
832 /* check for Inf and NaN values */
833 if (thefloat
== PARROT_FLOATVAL_INF_POSITIVE
) {
834 ts
= cstr2pstr(PARROT_CSTRING_INF_POSITIVE
);
836 else if (thefloat
== PARROT_FLOATVAL_INF_NEGATIVE
) {
837 ts
= cstr2pstr(PARROT_CSTRING_INF_NEGATIVE
);
839 else if (thefloat
!= thefloat
) {
840 ts
= cstr2pstr(PARROT_CSTRING_NAN_QUIET
);
843 /* turn -0.0 into 0.0 */
844 gen_sprintf_call(tc
, &info
, ch
);
848 /* XXX lost precision if %Hg or whatever */
850 char * const tempstr
=
851 Parrot_str_to_cstring(interp
, ts
);
853 #ifdef PARROT_HAS_SNPRINTF
854 snprintf(tc
, PARROT_SPRINTF_BUFFER_SIZE
,
858 /* the buffer is 4096, so no problem here */
859 sprintf(tc
, tempstr
, (double)thefloat
);
861 Parrot_str_free_cstring(tempstr
);
864 if (ch
== 'e' || ch
== 'E' ||
865 ch
== 'g' || ch
== 'G')
866 canonicalize_exponent(interp
, tc
, &info
);
868 targ
= Parrot_str_concat(interp
, targ
, cstr2pstr(tc
));
873 case 'r': /* Python repr */
874 /* XXX the right fix is to add a getrepr entry *
875 * to SPRINTF_OBJ, but for now, getstring_pmc *
876 * is inlined and modified to call get_repr */
877 if (obj
->getstring
== pmc_core
.getstring
) {
879 VTABLE_get_pmc_keyed_int(interp
,
883 STRING
* const string
= (VTABLE_get_repr(interp
, tmp
));
884 STRING
* const ts
= handle_flags(interp
, &info
,
888 targ
= Parrot_str_concat(interp
, targ
, ts
);
895 STRING
* const string
= obj
->getstring(interp
,
897 /* XXX Silently ignore? */
898 if (!STRING_IS_NULL(string
)) {
899 STRING
* const ts
= handle_flags(interp
,
900 &info
, string
, 0, NULL
);
901 targ
= Parrot_str_concat(interp
, targ
, ts
);
907 /* fake the old %P and %S commands */
908 if (info
.type
== SIZE_PMC
909 || info
.type
== SIZE_PSTR
) {
912 /* case 's' will see the SIZE_PMC or SIZE_PSTR
913 * and assume it was %Ps (or %Ss). Genius,
917 Parrot_ex_throw_from_c_args(interp
, NULL
,
918 EXCEPTION_INVALID_CHARACTER
,
919 "'%c' is not a valid sprintf format", ch
);
923 info
.phase
= PHASE_DONE
;
928 /* This is the terminating condition of the surrounding
931 PANIC(interp
, "We can't be here");
944 substr
= Parrot_str_substr(interp
, pat
, old
, len
);
945 targ
= Parrot_str_concat(interp
, targ
, substr
);
957 F<src/misc.h>, F<src/misc.c>, F<src/spf_vtable.c>.
966 * c-file-style: "parrot"
968 * vim: expandtab shiftwidth=4: