YJIT: implement cache for recently encoded/decoded contexts (#10938)
[ruby.git] / pack.c
blob1b0d66f9900995e6fd5063f4835fb7daa08802c8
1 /**********************************************************************
3 pack.c -
5 $Author$
6 created at: Thu Feb 10 15:17:05 JST 1994
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/internal/config.h"
14 #include <ctype.h>
15 #include <errno.h>
16 #include <float.h>
17 #include <sys/types.h>
19 #include "internal.h"
20 #include "internal/array.h"
21 #include "internal/bits.h"
22 #include "internal/string.h"
23 #include "internal/symbol.h"
24 #include "internal/variable.h"
25 #include "ruby/util.h"
27 #include "builtin.h"
30 * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
31 * instead of HAVE_LONG_LONG or LONG_LONG.
32 * This means q! and Q! means always the standard long long type and
33 * causes ArgumentError for platforms which has no long long type,
34 * even if the platform has an implementation specific 64bit type.
35 * This behavior is consistent with the document of pack/unpack.
37 #ifdef HAVE_TRUE_LONG_LONG
38 static const char natstr[] = "sSiIlLqQjJ";
39 # define endstr natstr
40 #else
41 static const char natstr[] = "sSiIlLjJ";
42 static const char endstr[] = "sSiIlLqQjJ";
43 #endif
45 #ifdef HAVE_TRUE_LONG_LONG
46 /* It is intentional to use long long instead of LONG_LONG. */
47 # define NATINT_LEN_Q NATINT_LEN(long long, 8)
48 #else
49 # define NATINT_LEN_Q 8
50 #endif
52 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
53 # define NATINT_PACK
54 #endif
56 #ifdef DYNAMIC_ENDIAN
57 /* for universal binary of NEXTSTEP and MacOS X */
58 /* useless since autoconf 2.63? */
59 static int
60 is_bigendian(void)
62 static int init = 0;
63 static int endian_value;
64 char *p;
66 if (init) return endian_value;
67 init = 1;
68 p = (char*)&init;
69 return endian_value = p[0]?0:1;
71 # define BIGENDIAN_P() (is_bigendian())
72 #elif defined(WORDS_BIGENDIAN)
73 # define BIGENDIAN_P() 1
74 #else
75 # define BIGENDIAN_P() 0
76 #endif
78 #ifdef NATINT_PACK
79 # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
80 #else
81 # define NATINT_LEN(type,len) ((int)sizeof(type))
82 #endif
84 typedef union {
85 float f;
86 uint32_t u;
87 char buf[4];
88 } FLOAT_SWAPPER;
89 typedef union {
90 double d;
91 uint64_t u;
92 char buf[8];
93 } DOUBLE_SWAPPER;
94 #define swapf(x) swap32(x)
95 #define swapd(x) swap64(x)
97 #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
98 #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
99 #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
100 #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
101 #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
102 #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
103 #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
104 #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
106 #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
107 #define HTONF(x) ((x).u = rb_htonf((x).u))
108 #define HTOVF(x) ((x).u = rb_htovf((x).u))
109 #define NTOHF(x) ((x).u = rb_ntohf((x).u))
110 #define VTOHF(x) ((x).u = rb_vtohf((x).u))
112 #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
113 #define HTOND(x) ((x).u = rb_htond((x).u))
114 #define HTOVD(x) ((x).u = rb_htovd((x).u))
115 #define NTOHD(x) ((x).u = rb_ntohd((x).u))
116 #define VTOHD(x) ((x).u = rb_vtohd((x).u))
118 #define MAX_INTEGER_PACK_SIZE 8
120 static const char toofew[] = "too few arguments";
122 static void encodes(VALUE,const char*,long,int,int);
123 static void qpencode(VALUE,VALUE,long);
125 static unsigned long utf8_to_uv(const char*,long*);
127 static ID id_associated;
129 static void
130 str_associate(VALUE str, VALUE add)
132 /* assert(NIL_P(rb_attr_get(str, id_associated))); */
133 rb_ivar_set(str, id_associated, add);
136 static VALUE
137 str_associated(VALUE str)
139 VALUE associates = rb_ivar_lookup(str, id_associated, Qfalse);
140 if (!associates)
141 rb_raise(rb_eArgError, "no associated pointer");
142 return associates;
145 static VALUE
146 associated_pointer(VALUE associates, const char *t)
148 const VALUE *p = RARRAY_CONST_PTR(associates);
149 const VALUE *pend = p + RARRAY_LEN(associates);
150 for (; p < pend; p++) {
151 VALUE tmp = *p;
152 if (RB_TYPE_P(tmp, T_STRING) && RSTRING_PTR(tmp) == t) return tmp;
154 rb_raise(rb_eArgError, "non associated pointer");
155 UNREACHABLE_RETURN(Qnil);
158 RBIMPL_ATTR_NORETURN()
159 static void
160 unknown_directive(const char *mode, char type, VALUE fmt)
162 char unknown[5];
164 if (ISPRINT(type)) {
165 unknown[0] = type;
166 unknown[1] = '\0';
168 else {
169 snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
171 fmt = rb_str_quote_unprintable(fmt);
172 rb_raise(rb_eArgError, "unknown %s directive '%s' in '%"PRIsVALUE"'",
173 mode, unknown, fmt);
176 static float
177 VALUE_to_float(VALUE obj)
179 VALUE v = rb_to_float(obj);
180 double d = RFLOAT_VALUE(v);
182 if (isnan(d)) {
183 return NAN;
185 else if (d < -FLT_MAX) {
186 return -INFINITY;
188 else if (d <= FLT_MAX) {
189 return d;
191 else {
192 return INFINITY;
196 static VALUE
197 pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
199 static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
200 static const char spc10[] = " ";
201 const char *p, *pend;
202 VALUE res, from, associates = 0;
203 char type;
204 long len, idx, plen;
205 const char *ptr;
206 int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
207 #ifdef NATINT_PACK
208 int natint; /* native integer */
209 #endif
210 int integer_size, bigendian_p;
212 StringValue(fmt);
213 rb_must_asciicompat(fmt);
214 p = RSTRING_PTR(fmt);
215 pend = p + RSTRING_LEN(fmt);
217 if (NIL_P(buffer)) {
218 res = rb_str_buf_new(0);
220 else {
221 if (!RB_TYPE_P(buffer, T_STRING))
222 rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
223 rb_str_modify(buffer);
224 res = buffer;
227 idx = 0;
229 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
230 #define MORE_ITEM (idx < RARRAY_LEN(ary))
231 #define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
232 #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
234 while (p < pend) {
235 int explicit_endian = 0;
236 if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
237 rb_raise(rb_eRuntimeError, "format string modified");
239 type = *p++; /* get data type */
240 #ifdef NATINT_PACK
241 natint = 0;
242 #endif
244 if (ISSPACE(type)) continue;
245 if (type == '#') {
246 while ((p < pend) && (*p != '\n')) {
247 p++;
249 continue;
253 modifiers:
254 switch (*p) {
255 case '_':
256 case '!':
257 if (strchr(natstr, type)) {
258 #ifdef NATINT_PACK
259 natint = 1;
260 #endif
261 p++;
263 else {
264 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
266 goto modifiers;
268 case '<':
269 case '>':
270 if (!strchr(endstr, type)) {
271 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
273 if (explicit_endian) {
274 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
276 explicit_endian = *p++;
277 goto modifiers;
281 if (*p == '*') { /* set data length */
282 len = strchr("@Xxu", type) ? 0
283 : strchr("PMm", type) ? 1
284 : RARRAY_LEN(ary) - idx;
285 p++;
287 else if (ISDIGIT(*p)) {
288 errno = 0;
289 len = STRTOUL(p, (char**)&p, 10);
290 if (errno) {
291 rb_raise(rb_eRangeError, "pack length too big");
294 else {
295 len = 1;
298 switch (type) {
299 case 'U':
300 /* if encoding is US-ASCII, upgrade to UTF-8 */
301 if (enc_info == 1) enc_info = 2;
302 break;
303 case 'm': case 'M': case 'u':
304 /* keep US-ASCII (do nothing) */
305 break;
306 default:
307 /* fall back to BINARY */
308 enc_info = 0;
309 break;
311 switch (type) {
312 case 'A': case 'a': case 'Z':
313 case 'B': case 'b':
314 case 'H': case 'h':
315 from = NEXTFROM;
316 if (NIL_P(from)) {
317 ptr = "";
318 plen = 0;
320 else {
321 StringValue(from);
322 ptr = RSTRING_PTR(from);
323 plen = RSTRING_LEN(from);
326 if (p[-1] == '*')
327 len = plen;
329 switch (type) {
330 case 'a': /* arbitrary binary string (null padded) */
331 case 'A': /* arbitrary binary string (ASCII space padded) */
332 case 'Z': /* null terminated string */
333 if (plen >= len) {
334 rb_str_buf_cat(res, ptr, len);
335 if (p[-1] == '*' && type == 'Z')
336 rb_str_buf_cat(res, nul10, 1);
338 else {
339 rb_str_buf_cat(res, ptr, plen);
340 len -= plen;
341 while (len >= 10) {
342 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
343 len -= 10;
345 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
347 break;
349 #define castchar(from) (char)((from) & 0xff)
351 case 'b': /* bit string (ascending) */
353 int byte = 0;
354 long i, j = 0;
356 if (len > plen) {
357 j = (len - plen + 1)/2;
358 len = plen;
360 for (i=0; i++ < len; ptr++) {
361 if (*ptr & 1)
362 byte |= 128;
363 if (i & 7)
364 byte >>= 1;
365 else {
366 char c = castchar(byte);
367 rb_str_buf_cat(res, &c, 1);
368 byte = 0;
371 if (len & 7) {
372 char c;
373 byte >>= 7 - (len & 7);
374 c = castchar(byte);
375 rb_str_buf_cat(res, &c, 1);
377 len = j;
378 goto grow;
380 break;
382 case 'B': /* bit string (descending) */
384 int byte = 0;
385 long i, j = 0;
387 if (len > plen) {
388 j = (len - plen + 1)/2;
389 len = plen;
391 for (i=0; i++ < len; ptr++) {
392 byte |= *ptr & 1;
393 if (i & 7)
394 byte <<= 1;
395 else {
396 char c = castchar(byte);
397 rb_str_buf_cat(res, &c, 1);
398 byte = 0;
401 if (len & 7) {
402 char c;
403 byte <<= 7 - (len & 7);
404 c = castchar(byte);
405 rb_str_buf_cat(res, &c, 1);
407 len = j;
408 goto grow;
410 break;
412 case 'h': /* hex string (low nibble first) */
414 int byte = 0;
415 long i, j = 0;
417 if (len > plen) {
418 j = (len + 1) / 2 - (plen + 1) / 2;
419 len = plen;
421 for (i=0; i++ < len; ptr++) {
422 if (ISALPHA(*ptr))
423 byte |= (((*ptr & 15) + 9) & 15) << 4;
424 else
425 byte |= (*ptr & 15) << 4;
426 if (i & 1)
427 byte >>= 4;
428 else {
429 char c = castchar(byte);
430 rb_str_buf_cat(res, &c, 1);
431 byte = 0;
434 if (len & 1) {
435 char c = castchar(byte);
436 rb_str_buf_cat(res, &c, 1);
438 len = j;
439 goto grow;
441 break;
443 case 'H': /* hex string (high nibble first) */
445 int byte = 0;
446 long i, j = 0;
448 if (len > plen) {
449 j = (len + 1) / 2 - (plen + 1) / 2;
450 len = plen;
452 for (i=0; i++ < len; ptr++) {
453 if (ISALPHA(*ptr))
454 byte |= ((*ptr & 15) + 9) & 15;
455 else
456 byte |= *ptr & 15;
457 if (i & 1)
458 byte <<= 4;
459 else {
460 char c = castchar(byte);
461 rb_str_buf_cat(res, &c, 1);
462 byte = 0;
465 if (len & 1) {
466 char c = castchar(byte);
467 rb_str_buf_cat(res, &c, 1);
469 len = j;
470 goto grow;
472 break;
474 break;
476 case 'c': /* signed char */
477 case 'C': /* unsigned char */
478 integer_size = 1;
479 bigendian_p = BIGENDIAN_P(); /* not effective */
480 goto pack_integer;
482 case 's': /* s for int16_t, s! for signed short */
483 case 'S': /* S for uint16_t, S! for unsigned short */
484 integer_size = NATINT_LEN(short, 2);
485 bigendian_p = BIGENDIAN_P();
486 goto pack_integer;
488 case 'i': /* i and i! for signed int */
489 case 'I': /* I and I! for unsigned int */
490 integer_size = (int)sizeof(int);
491 bigendian_p = BIGENDIAN_P();
492 goto pack_integer;
494 case 'l': /* l for int32_t, l! for signed long */
495 case 'L': /* L for uint32_t, L! for unsigned long */
496 integer_size = NATINT_LEN(long, 4);
497 bigendian_p = BIGENDIAN_P();
498 goto pack_integer;
500 case 'q': /* q for int64_t, q! for signed long long */
501 case 'Q': /* Q for uint64_t, Q! for unsigned long long */
502 integer_size = NATINT_LEN_Q;
503 bigendian_p = BIGENDIAN_P();
504 goto pack_integer;
506 case 'j': /* j for intptr_t */
507 integer_size = sizeof(intptr_t);
508 bigendian_p = BIGENDIAN_P();
509 goto pack_integer;
511 case 'J': /* J for uintptr_t */
512 integer_size = sizeof(uintptr_t);
513 bigendian_p = BIGENDIAN_P();
514 goto pack_integer;
516 case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
517 integer_size = 2;
518 bigendian_p = 1;
519 goto pack_integer;
521 case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
522 integer_size = 4;
523 bigendian_p = 1;
524 goto pack_integer;
526 case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
527 integer_size = 2;
528 bigendian_p = 0;
529 goto pack_integer;
531 case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
532 integer_size = 4;
533 bigendian_p = 0;
534 goto pack_integer;
536 pack_integer:
537 if (explicit_endian) {
538 bigendian_p = explicit_endian == '>';
540 if (integer_size > MAX_INTEGER_PACK_SIZE)
541 rb_bug("unexpected integer size for pack: %d", integer_size);
542 while (len-- > 0) {
543 char intbuf[MAX_INTEGER_PACK_SIZE];
545 from = NEXTFROM;
546 rb_integer_pack(from, intbuf, integer_size, 1, 0,
547 INTEGER_PACK_2COMP |
548 (bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN));
549 rb_str_buf_cat(res, intbuf, integer_size);
551 break;
553 case 'f': /* single precision float in native format */
554 case 'F': /* ditto */
555 while (len-- > 0) {
556 float f;
558 from = NEXTFROM;
559 f = VALUE_to_float(from);
560 rb_str_buf_cat(res, (char*)&f, sizeof(float));
562 break;
564 case 'e': /* single precision float in VAX byte-order */
565 while (len-- > 0) {
566 FLOAT_CONVWITH(tmp);
568 from = NEXTFROM;
569 tmp.f = VALUE_to_float(from);
570 HTOVF(tmp);
571 rb_str_buf_cat(res, tmp.buf, sizeof(float));
573 break;
575 case 'E': /* double precision float in VAX byte-order */
576 while (len-- > 0) {
577 DOUBLE_CONVWITH(tmp);
578 from = NEXTFROM;
579 tmp.d = RFLOAT_VALUE(rb_to_float(from));
580 HTOVD(tmp);
581 rb_str_buf_cat(res, tmp.buf, sizeof(double));
583 break;
585 case 'd': /* double precision float in native format */
586 case 'D': /* ditto */
587 while (len-- > 0) {
588 double d;
590 from = NEXTFROM;
591 d = RFLOAT_VALUE(rb_to_float(from));
592 rb_str_buf_cat(res, (char*)&d, sizeof(double));
594 break;
596 case 'g': /* single precision float in network byte-order */
597 while (len-- > 0) {
598 FLOAT_CONVWITH(tmp);
599 from = NEXTFROM;
600 tmp.f = VALUE_to_float(from);
601 HTONF(tmp);
602 rb_str_buf_cat(res, tmp.buf, sizeof(float));
604 break;
606 case 'G': /* double precision float in network byte-order */
607 while (len-- > 0) {
608 DOUBLE_CONVWITH(tmp);
610 from = NEXTFROM;
611 tmp.d = RFLOAT_VALUE(rb_to_float(from));
612 HTOND(tmp);
613 rb_str_buf_cat(res, tmp.buf, sizeof(double));
615 break;
617 case 'x': /* null byte */
618 grow:
619 while (len >= 10) {
620 rb_str_buf_cat(res, nul10, 10);
621 len -= 10;
623 rb_str_buf_cat(res, nul10, len);
624 break;
626 case 'X': /* back up byte */
627 shrink:
628 plen = RSTRING_LEN(res);
629 if (plen < len)
630 rb_raise(rb_eArgError, "X outside of string");
631 rb_str_set_len(res, plen - len);
632 break;
634 case '@': /* null fill to absolute position */
635 len -= RSTRING_LEN(res);
636 if (len > 0) goto grow;
637 len = -len;
638 if (len > 0) goto shrink;
639 break;
641 case '%':
642 rb_raise(rb_eArgError, "%% is not supported");
643 break;
645 case 'U': /* Unicode character */
646 while (len-- > 0) {
647 SIGNED_VALUE l;
648 char buf[8];
649 int le;
651 from = NEXTFROM;
652 from = rb_to_int(from);
653 l = NUM2LONG(from);
654 if (l < 0) {
655 rb_raise(rb_eRangeError, "pack(U): value out of range");
657 le = rb_uv_to_utf8(buf, l);
658 rb_str_buf_cat(res, (char*)buf, le);
660 break;
662 case 'u': /* uuencoded string */
663 case 'm': /* base64 encoded string */
664 from = NEXTFROM;
665 StringValue(from);
666 ptr = RSTRING_PTR(from);
667 plen = RSTRING_LEN(from);
669 if (len == 0 && type == 'm') {
670 encodes(res, ptr, plen, type, 0);
671 ptr += plen;
672 break;
674 if (len <= 2)
675 len = 45;
676 else if (len > 63 && type == 'u')
677 len = 63;
678 else
679 len = len / 3 * 3;
680 while (plen > 0) {
681 long todo;
683 if (plen > len)
684 todo = len;
685 else
686 todo = plen;
687 encodes(res, ptr, todo, type, 1);
688 plen -= todo;
689 ptr += todo;
691 break;
693 case 'M': /* quoted-printable encoded string */
694 from = rb_obj_as_string(NEXTFROM);
695 if (len <= 1)
696 len = 72;
697 qpencode(res, from, len);
698 break;
700 case 'P': /* pointer to packed byte string */
701 from = THISFROM;
702 if (!NIL_P(from)) {
703 StringValue(from);
704 if (RSTRING_LEN(from) < len) {
705 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
706 RSTRING_LEN(from), len);
709 len = 1;
710 /* FALL THROUGH */
711 case 'p': /* pointer to string */
712 while (len-- > 0) {
713 char *t;
714 from = NEXTFROM;
715 if (NIL_P(from)) {
716 t = 0;
718 else {
719 t = StringValuePtr(from);
721 if (!associates) {
722 associates = rb_ary_new();
724 rb_ary_push(associates, from);
725 rb_str_buf_cat(res, (char*)&t, sizeof(char*));
727 break;
729 case 'w': /* BER compressed integer */
730 while (len-- > 0) {
731 VALUE buf = rb_str_new(0, 0);
732 size_t numbytes;
733 int sign;
734 char *cp;
736 from = NEXTFROM;
737 from = rb_to_int(from);
738 numbytes = rb_absint_numwords(from, 7, NULL);
739 if (numbytes == 0)
740 numbytes = 1;
741 buf = rb_str_new(NULL, numbytes);
743 sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
745 if (sign < 0)
746 rb_raise(rb_eArgError, "can't compress negative numbers");
747 if (sign == 2)
748 rb_bug("buffer size problem?");
750 cp = RSTRING_PTR(buf);
751 while (1 < numbytes) {
752 *cp |= 0x80;
753 cp++;
754 numbytes--;
757 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
759 break;
761 default: {
762 unknown_directive("pack", type, fmt);
763 break;
768 if (associates) {
769 str_associate(res, associates);
771 switch (enc_info) {
772 case 1:
773 ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
774 break;
775 case 2:
776 rb_enc_set_index(res, rb_utf8_encindex());
777 break;
778 default:
779 /* do nothing, keep ASCII-8BIT */
780 break;
782 return res;
785 VALUE
786 rb_ec_pack_ary(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
788 return pack_pack(ec, ary, fmt, buffer);
791 static const char uu_table[] =
792 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
793 static const char b64_table[] =
794 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
796 static void
797 encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
799 enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
800 char buff[buff_size + 1]; /* +1 for tail_lf */
801 long i = 0;
802 const char *const trans = type == 'u' ? uu_table : b64_table;
803 char padding;
804 const unsigned char *s = (const unsigned char *)s0;
806 if (type == 'u') {
807 buff[i++] = (char)len + ' ';
808 padding = '`';
810 else {
811 padding = '=';
813 while (len >= input_unit) {
814 while (len >= input_unit && buff_size-i >= encoded_unit) {
815 buff[i++] = trans[077 & (*s >> 2)];
816 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
817 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
818 buff[i++] = trans[077 & s[2]];
819 s += input_unit;
820 len -= input_unit;
822 if (buff_size-i < encoded_unit) {
823 rb_str_buf_cat(str, buff, i);
824 i = 0;
828 if (len == 2) {
829 buff[i++] = trans[077 & (*s >> 2)];
830 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
831 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
832 buff[i++] = padding;
834 else if (len == 1) {
835 buff[i++] = trans[077 & (*s >> 2)];
836 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
837 buff[i++] = padding;
838 buff[i++] = padding;
840 if (tail_lf) buff[i++] = '\n';
841 rb_str_buf_cat(str, buff, i);
842 if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
845 static const char hex_table[] = "0123456789ABCDEF";
847 static void
848 qpencode(VALUE str, VALUE from, long len)
850 char buff[1024];
851 long i = 0, n = 0, prev = EOF;
852 unsigned char *s = (unsigned char*)RSTRING_PTR(from);
853 unsigned char *send = s + RSTRING_LEN(from);
855 while (s < send) {
856 if ((*s > 126) ||
857 (*s < 32 && *s != '\n' && *s != '\t') ||
858 (*s == '=')) {
859 buff[i++] = '=';
860 buff[i++] = hex_table[*s >> 4];
861 buff[i++] = hex_table[*s & 0x0f];
862 n += 3;
863 prev = EOF;
865 else if (*s == '\n') {
866 if (prev == ' ' || prev == '\t') {
867 buff[i++] = '=';
868 buff[i++] = *s;
870 buff[i++] = *s;
871 n = 0;
872 prev = *s;
874 else {
875 buff[i++] = *s;
876 n++;
877 prev = *s;
879 if (n > len) {
880 buff[i++] = '=';
881 buff[i++] = '\n';
882 n = 0;
883 prev = '\n';
885 if (i > 1024 - 5) {
886 rb_str_buf_cat(str, buff, i);
887 i = 0;
889 s++;
891 if (n > 0) {
892 buff[i++] = '=';
893 buff[i++] = '\n';
895 if (i > 0) {
896 rb_str_buf_cat(str, buff, i);
900 static inline int
901 hex2num(char c)
903 int n;
904 n = ruby_digit36_to_number_table[(unsigned char)c];
905 if (16 <= n)
906 n = -1;
907 return n;
910 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
911 tmp_len = 0; \
912 if (len > (long)((send-s)/(sz))) { \
913 if (!star) { \
914 tmp_len = len-(send-s)/(sz); \
916 len = (send-s)/(sz); \
918 } while (0)
920 #define PACK_ITEM_ADJUST() do { \
921 if (tmp_len > 0 && mode == UNPACK_ARRAY) \
922 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
923 } while (0)
925 /* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
926 * 12.4/12.5/12.6 C compiler optimization bug
927 * with "-xO4" optimization option.
929 #if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
930 # define AVOID_CC_BUG volatile
931 #else
932 # define AVOID_CC_BUG
933 #endif
935 enum unpack_mode {
936 UNPACK_ARRAY,
937 UNPACK_BLOCK,
938 UNPACK_1
941 static VALUE
942 pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
944 #define hexdigits ruby_hexdigits
945 char *s, *send;
946 char *p, *pend;
947 VALUE ary, associates = Qfalse;
948 char type;
949 long len;
950 AVOID_CC_BUG long tmp_len;
951 int star;
952 #ifdef NATINT_PACK
953 int natint; /* native integer */
954 #endif
955 int signed_p, integer_size, bigendian_p;
956 #define UNPACK_PUSH(item) do {\
957 VALUE item_val = (item);\
958 if ((mode) == UNPACK_BLOCK) {\
959 rb_yield(item_val);\
961 else if ((mode) == UNPACK_ARRAY) {\
962 rb_ary_push(ary, item_val);\
964 else /* if ((mode) == UNPACK_1) { */ {\
965 return item_val; \
967 } while (0)
969 StringValue(str);
970 StringValue(fmt);
971 rb_must_asciicompat(fmt);
973 if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
974 len = RSTRING_LEN(str);
975 if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
977 s = RSTRING_PTR(str);
978 send = s + len;
979 s += offset;
981 p = RSTRING_PTR(fmt);
982 pend = p + RSTRING_LEN(fmt);
984 #define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type))
986 ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
987 while (p < pend) {
988 int explicit_endian = 0;
989 type = *p++;
990 #ifdef NATINT_PACK
991 natint = 0;
992 #endif
994 if (ISSPACE(type)) continue;
995 if (type == '#') {
996 while ((p < pend) && (*p != '\n')) {
997 p++;
999 continue;
1002 star = 0;
1004 modifiers:
1005 switch (*p) {
1006 case '_':
1007 case '!':
1009 if (strchr(natstr, type)) {
1010 #ifdef NATINT_PACK
1011 natint = 1;
1012 #endif
1013 p++;
1015 else {
1016 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
1018 goto modifiers;
1020 case '<':
1021 case '>':
1022 if (!strchr(endstr, type)) {
1023 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
1025 if (explicit_endian) {
1026 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1028 explicit_endian = *p++;
1029 goto modifiers;
1033 if (p >= pend)
1034 len = 1;
1035 else if (*p == '*') {
1036 star = 1;
1037 len = send - s;
1038 p++;
1040 else if (ISDIGIT(*p)) {
1041 errno = 0;
1042 len = STRTOUL(p, (char**)&p, 10);
1043 if (len < 0 || errno) {
1044 rb_raise(rb_eRangeError, "pack length too big");
1047 else {
1048 len = (type != '@');
1051 switch (type) {
1052 case '%':
1053 rb_raise(rb_eArgError, "%% is not supported");
1054 break;
1056 case 'A':
1057 if (len > send - s) len = send - s;
1059 long end = len;
1060 char *t = s + len - 1;
1062 while (t >= s) {
1063 if (*t != ' ' && *t != '\0') break;
1064 t--; len--;
1066 UNPACK_PUSH(rb_str_new(s, len));
1067 s += end;
1069 break;
1071 case 'Z':
1073 char *t = s;
1075 if (len > send-s) len = send-s;
1076 while (t < s+len && *t) t++;
1077 UNPACK_PUSH(rb_str_new(s, t-s));
1078 if (t < send) t++;
1079 s = star ? t : s+len;
1081 break;
1083 case 'a':
1084 if (len > send - s) len = send - s;
1085 UNPACK_PUSH(rb_str_new(s, len));
1086 s += len;
1087 break;
1089 case 'b':
1091 VALUE bitstr;
1092 char *t;
1093 int bits;
1094 long i;
1096 if (p[-1] == '*' || len > (send - s) * 8)
1097 len = (send - s) * 8;
1098 bits = 0;
1099 bitstr = rb_usascii_str_new(0, len);
1100 t = RSTRING_PTR(bitstr);
1101 for (i=0; i<len; i++) {
1102 if (i & 7) bits >>= 1;
1103 else bits = (unsigned char)*s++;
1104 *t++ = (bits & 1) ? '1' : '0';
1106 UNPACK_PUSH(bitstr);
1108 break;
1110 case 'B':
1112 VALUE bitstr;
1113 char *t;
1114 int bits;
1115 long i;
1117 if (p[-1] == '*' || len > (send - s) * 8)
1118 len = (send - s) * 8;
1119 bits = 0;
1120 bitstr = rb_usascii_str_new(0, len);
1121 t = RSTRING_PTR(bitstr);
1122 for (i=0; i<len; i++) {
1123 if (i & 7) bits <<= 1;
1124 else bits = (unsigned char)*s++;
1125 *t++ = (bits & 128) ? '1' : '0';
1127 UNPACK_PUSH(bitstr);
1129 break;
1131 case 'h':
1133 VALUE bitstr;
1134 char *t;
1135 int bits;
1136 long i;
1138 if (p[-1] == '*' || len > (send - s) * 2)
1139 len = (send - s) * 2;
1140 bits = 0;
1141 bitstr = rb_usascii_str_new(0, len);
1142 t = RSTRING_PTR(bitstr);
1143 for (i=0; i<len; i++) {
1144 if (i & 1)
1145 bits >>= 4;
1146 else
1147 bits = (unsigned char)*s++;
1148 *t++ = hexdigits[bits & 15];
1150 UNPACK_PUSH(bitstr);
1152 break;
1154 case 'H':
1156 VALUE bitstr;
1157 char *t;
1158 int bits;
1159 long i;
1161 if (p[-1] == '*' || len > (send - s) * 2)
1162 len = (send - s) * 2;
1163 bits = 0;
1164 bitstr = rb_usascii_str_new(0, len);
1165 t = RSTRING_PTR(bitstr);
1166 for (i=0; i<len; i++) {
1167 if (i & 1)
1168 bits <<= 4;
1169 else
1170 bits = (unsigned char)*s++;
1171 *t++ = hexdigits[(bits >> 4) & 15];
1173 UNPACK_PUSH(bitstr);
1175 break;
1177 case 'c':
1178 signed_p = 1;
1179 integer_size = 1;
1180 bigendian_p = BIGENDIAN_P(); /* not effective */
1181 goto unpack_integer;
1183 case 'C':
1184 signed_p = 0;
1185 integer_size = 1;
1186 bigendian_p = BIGENDIAN_P(); /* not effective */
1187 goto unpack_integer;
1189 case 's':
1190 signed_p = 1;
1191 integer_size = NATINT_LEN(short, 2);
1192 bigendian_p = BIGENDIAN_P();
1193 goto unpack_integer;
1195 case 'S':
1196 signed_p = 0;
1197 integer_size = NATINT_LEN(short, 2);
1198 bigendian_p = BIGENDIAN_P();
1199 goto unpack_integer;
1201 case 'i':
1202 signed_p = 1;
1203 integer_size = (int)sizeof(int);
1204 bigendian_p = BIGENDIAN_P();
1205 goto unpack_integer;
1207 case 'I':
1208 signed_p = 0;
1209 integer_size = (int)sizeof(int);
1210 bigendian_p = BIGENDIAN_P();
1211 goto unpack_integer;
1213 case 'l':
1214 signed_p = 1;
1215 integer_size = NATINT_LEN(long, 4);
1216 bigendian_p = BIGENDIAN_P();
1217 goto unpack_integer;
1219 case 'L':
1220 signed_p = 0;
1221 integer_size = NATINT_LEN(long, 4);
1222 bigendian_p = BIGENDIAN_P();
1223 goto unpack_integer;
1225 case 'q':
1226 signed_p = 1;
1227 integer_size = NATINT_LEN_Q;
1228 bigendian_p = BIGENDIAN_P();
1229 goto unpack_integer;
1231 case 'Q':
1232 signed_p = 0;
1233 integer_size = NATINT_LEN_Q;
1234 bigendian_p = BIGENDIAN_P();
1235 goto unpack_integer;
1237 case 'j':
1238 signed_p = 1;
1239 integer_size = sizeof(intptr_t);
1240 bigendian_p = BIGENDIAN_P();
1241 goto unpack_integer;
1243 case 'J':
1244 signed_p = 0;
1245 integer_size = sizeof(uintptr_t);
1246 bigendian_p = BIGENDIAN_P();
1247 goto unpack_integer;
1249 case 'n':
1250 signed_p = 0;
1251 integer_size = 2;
1252 bigendian_p = 1;
1253 goto unpack_integer;
1255 case 'N':
1256 signed_p = 0;
1257 integer_size = 4;
1258 bigendian_p = 1;
1259 goto unpack_integer;
1261 case 'v':
1262 signed_p = 0;
1263 integer_size = 2;
1264 bigendian_p = 0;
1265 goto unpack_integer;
1267 case 'V':
1268 signed_p = 0;
1269 integer_size = 4;
1270 bigendian_p = 0;
1271 goto unpack_integer;
1273 unpack_integer:
1274 if (explicit_endian) {
1275 bigendian_p = explicit_endian == '>';
1277 PACK_LENGTH_ADJUST_SIZE(integer_size);
1278 while (len-- > 0) {
1279 int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1280 VALUE val;
1281 if (signed_p)
1282 flags |= INTEGER_PACK_2COMP;
1283 val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1284 UNPACK_PUSH(val);
1285 s += integer_size;
1287 PACK_ITEM_ADJUST();
1288 break;
1290 case 'f':
1291 case 'F':
1292 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1293 while (len-- > 0) {
1294 float tmp;
1295 UNPACK_FETCH(&tmp, float);
1296 UNPACK_PUSH(DBL2NUM((double)tmp));
1298 PACK_ITEM_ADJUST();
1299 break;
1301 case 'e':
1302 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1303 while (len-- > 0) {
1304 FLOAT_CONVWITH(tmp);
1305 UNPACK_FETCH(tmp.buf, float);
1306 VTOHF(tmp);
1307 UNPACK_PUSH(DBL2NUM(tmp.f));
1309 PACK_ITEM_ADJUST();
1310 break;
1312 case 'E':
1313 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1314 while (len-- > 0) {
1315 DOUBLE_CONVWITH(tmp);
1316 UNPACK_FETCH(tmp.buf, double);
1317 VTOHD(tmp);
1318 UNPACK_PUSH(DBL2NUM(tmp.d));
1320 PACK_ITEM_ADJUST();
1321 break;
1323 case 'D':
1324 case 'd':
1325 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1326 while (len-- > 0) {
1327 double tmp;
1328 UNPACK_FETCH(&tmp, double);
1329 UNPACK_PUSH(DBL2NUM(tmp));
1331 PACK_ITEM_ADJUST();
1332 break;
1334 case 'g':
1335 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1336 while (len-- > 0) {
1337 FLOAT_CONVWITH(tmp);
1338 UNPACK_FETCH(tmp.buf, float);
1339 NTOHF(tmp);
1340 UNPACK_PUSH(DBL2NUM(tmp.f));
1342 PACK_ITEM_ADJUST();
1343 break;
1345 case 'G':
1346 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1347 while (len-- > 0) {
1348 DOUBLE_CONVWITH(tmp);
1349 UNPACK_FETCH(tmp.buf, double);
1350 NTOHD(tmp);
1351 UNPACK_PUSH(DBL2NUM(tmp.d));
1353 PACK_ITEM_ADJUST();
1354 break;
1356 case 'U':
1357 if (len > send - s) len = send - s;
1358 while (len > 0 && s < send) {
1359 long alen = send - s;
1360 unsigned long l;
1362 l = utf8_to_uv(s, &alen);
1363 s += alen; len--;
1364 UNPACK_PUSH(ULONG2NUM(l));
1366 break;
1368 case 'u':
1370 VALUE buf = rb_str_new(0, (send - s)*3/4);
1371 char *ptr = RSTRING_PTR(buf);
1372 long total = 0;
1374 while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1375 long a,b,c,d;
1376 char hunk[3];
1378 len = ((unsigned char)*s++ - ' ') & 077;
1380 total += len;
1381 if (total > RSTRING_LEN(buf)) {
1382 len -= total - RSTRING_LEN(buf);
1383 total = RSTRING_LEN(buf);
1386 while (len > 0) {
1387 long mlen = len > 3 ? 3 : len;
1389 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1390 a = ((unsigned char)*s++ - ' ') & 077;
1391 else
1392 a = 0;
1393 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1394 b = ((unsigned char)*s++ - ' ') & 077;
1395 else
1396 b = 0;
1397 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1398 c = ((unsigned char)*s++ - ' ') & 077;
1399 else
1400 c = 0;
1401 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1402 d = ((unsigned char)*s++ - ' ') & 077;
1403 else
1404 d = 0;
1405 hunk[0] = (char)(a << 2 | b >> 4);
1406 hunk[1] = (char)(b << 4 | c >> 2);
1407 hunk[2] = (char)(c << 6 | d);
1408 memcpy(ptr, hunk, mlen);
1409 ptr += mlen;
1410 len -= mlen;
1412 if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1413 s++; /* possible checksum byte */
1414 if (s < send && *s == '\r') s++;
1415 if (s < send && *s == '\n') s++;
1418 rb_str_set_len(buf, total);
1419 UNPACK_PUSH(buf);
1421 break;
1423 case 'm':
1425 VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
1426 char *ptr = RSTRING_PTR(buf);
1427 int a = -1,b = -1,c = 0,d = 0;
1428 static signed char b64_xtable[256];
1430 if (b64_xtable['/'] <= 0) {
1431 int i;
1433 for (i = 0; i < 256; i++) {
1434 b64_xtable[i] = -1;
1436 for (i = 0; i < 64; i++) {
1437 b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1440 if (len == 0) {
1441 while (s < send) {
1442 a = b = c = d = -1;
1443 a = b64_xtable[(unsigned char)*s++];
1444 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1445 b = b64_xtable[(unsigned char)*s++];
1446 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1447 if (*s == '=') {
1448 if (s + 2 == send && *(s + 1) == '=') break;
1449 rb_raise(rb_eArgError, "invalid base64");
1451 c = b64_xtable[(unsigned char)*s++];
1452 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1453 if (s + 1 == send && *s == '=') break;
1454 d = b64_xtable[(unsigned char)*s++];
1455 if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1456 *ptr++ = castchar(a << 2 | b >> 4);
1457 *ptr++ = castchar(b << 4 | c >> 2);
1458 *ptr++ = castchar(c << 6 | d);
1460 if (c == -1) {
1461 *ptr++ = castchar(a << 2 | b >> 4);
1462 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1464 else if (d == -1) {
1465 *ptr++ = castchar(a << 2 | b >> 4);
1466 *ptr++ = castchar(b << 4 | c >> 2);
1467 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1470 else {
1471 while (s < send) {
1472 a = b = c = d = -1;
1473 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1474 if (s >= send) break;
1475 s++;
1476 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1477 if (s >= send) break;
1478 s++;
1479 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1480 if (*s == '=' || s >= send) break;
1481 s++;
1482 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1483 if (*s == '=' || s >= send) break;
1484 s++;
1485 *ptr++ = castchar(a << 2 | b >> 4);
1486 *ptr++ = castchar(b << 4 | c >> 2);
1487 *ptr++ = castchar(c << 6 | d);
1488 a = -1;
1490 if (a != -1 && b != -1) {
1491 if (c == -1)
1492 *ptr++ = castchar(a << 2 | b >> 4);
1493 else {
1494 *ptr++ = castchar(a << 2 | b >> 4);
1495 *ptr++ = castchar(b << 4 | c >> 2);
1499 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1500 UNPACK_PUSH(buf);
1502 break;
1504 case 'M':
1506 VALUE buf = rb_str_new(0, send - s);
1507 char *ptr = RSTRING_PTR(buf), *ss = s;
1508 int csum = 0;
1509 int c1, c2;
1511 while (s < send) {
1512 if (*s == '=') {
1513 if (++s == send) break;
1514 if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1515 s++;
1516 if (*s != '\n') {
1517 if ((c1 = hex2num(*s)) == -1) break;
1518 if (++s == send) break;
1519 if ((c2 = hex2num(*s)) == -1) break;
1520 csum |= *ptr++ = castchar(c1 << 4 | c2);
1523 else {
1524 csum |= *ptr++ = *s;
1526 s++;
1527 ss = s;
1529 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1530 rb_str_buf_cat(buf, ss, send-ss);
1531 csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
1532 ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
1533 UNPACK_PUSH(buf);
1535 break;
1537 case '@':
1538 if (len > RSTRING_LEN(str))
1539 rb_raise(rb_eArgError, "@ outside of string");
1540 s = RSTRING_PTR(str) + len;
1541 break;
1543 case 'X':
1544 if (len > s - RSTRING_PTR(str))
1545 rb_raise(rb_eArgError, "X outside of string");
1546 s -= len;
1547 break;
1549 case 'x':
1550 if (len > send - s)
1551 rb_raise(rb_eArgError, "x outside of string");
1552 s += len;
1553 break;
1555 case 'P':
1556 if (sizeof(char *) <= (size_t)(send - s)) {
1557 VALUE tmp = Qnil;
1558 char *t;
1560 UNPACK_FETCH(&t, char *);
1561 if (t) {
1562 if (!associates) associates = str_associated(str);
1563 tmp = associated_pointer(associates, t);
1564 if (len < RSTRING_LEN(tmp)) {
1565 tmp = rb_str_new(t, len);
1566 str_associate(tmp, associates);
1569 UNPACK_PUSH(tmp);
1571 break;
1573 case 'p':
1574 if (len > (long)((send - s) / sizeof(char *)))
1575 len = (send - s) / sizeof(char *);
1576 while (len-- > 0) {
1577 if ((size_t)(send - s) < sizeof(char *))
1578 break;
1579 else {
1580 VALUE tmp = Qnil;
1581 char *t;
1583 UNPACK_FETCH(&t, char *);
1584 if (t) {
1585 if (!associates) associates = str_associated(str);
1586 tmp = associated_pointer(associates, t);
1588 UNPACK_PUSH(tmp);
1591 break;
1593 case 'w':
1595 char *s0 = s;
1596 while (len > 0 && s < send) {
1597 if (*s & 0x80) {
1598 s++;
1600 else {
1601 s++;
1602 UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
1603 len--;
1604 s0 = s;
1608 break;
1610 default:
1611 unknown_directive("unpack", type, fmt);
1612 break;
1616 return ary;
1619 static VALUE
1620 pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1622 enum unpack_mode mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1623 return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
1626 static VALUE
1627 pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1629 return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
1633 rb_uv_to_utf8(char buf[6], unsigned long uv)
1635 if (uv <= 0x7f) {
1636 buf[0] = (char)uv;
1637 return 1;
1639 if (uv <= 0x7ff) {
1640 buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1641 buf[1] = castchar((uv&0x3f)|0x80);
1642 return 2;
1644 if (uv <= 0xffff) {
1645 buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1646 buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1647 buf[2] = castchar((uv&0x3f)|0x80);
1648 return 3;
1650 if (uv <= 0x1fffff) {
1651 buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1652 buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1653 buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1654 buf[3] = castchar((uv&0x3f)|0x80);
1655 return 4;
1657 if (uv <= 0x3ffffff) {
1658 buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1659 buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1660 buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1661 buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1662 buf[4] = castchar((uv&0x3f)|0x80);
1663 return 5;
1665 if (uv <= 0x7fffffff) {
1666 buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1667 buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1668 buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1669 buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1670 buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1671 buf[5] = castchar((uv&0x3f)|0x80);
1672 return 6;
1674 rb_raise(rb_eRangeError, "pack(U): value out of range");
1676 UNREACHABLE_RETURN(Qnil);
1679 static const unsigned long utf8_limits[] = {
1680 0x0, /* 1 */
1681 0x80, /* 2 */
1682 0x800, /* 3 */
1683 0x10000, /* 4 */
1684 0x200000, /* 5 */
1685 0x4000000, /* 6 */
1686 0x80000000, /* 7 */
1689 static unsigned long
1690 utf8_to_uv(const char *p, long *lenp)
1692 int c = *p++ & 0xff;
1693 unsigned long uv = c;
1694 long n;
1696 if (!(uv & 0x80)) {
1697 *lenp = 1;
1698 return uv;
1700 if (!(uv & 0x40)) {
1701 *lenp = 1;
1702 rb_raise(rb_eArgError, "malformed UTF-8 character");
1705 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1706 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1707 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1708 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1709 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1710 else {
1711 *lenp = 1;
1712 rb_raise(rb_eArgError, "malformed UTF-8 character");
1714 if (n > *lenp) {
1715 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1716 n, *lenp);
1718 *lenp = n--;
1719 if (n != 0) {
1720 while (n--) {
1721 c = *p++ & 0xff;
1722 if ((c & 0xc0) != 0x80) {
1723 *lenp -= n + 1;
1724 rb_raise(rb_eArgError, "malformed UTF-8 character");
1726 else {
1727 c &= 0x3f;
1728 uv = uv << 6 | c;
1732 n = *lenp - 1;
1733 if (uv < utf8_limits[n]) {
1734 rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1736 return uv;
1739 #include "pack.rbinc"
1741 void
1742 Init_pack(void)
1744 id_associated = rb_make_internal_id();