[ruby/win32ole] Undefine allocator of WIN32OLE_VARIABLE to get rid of warning
[ruby-80x24.org.git] / pack.c
blob1fbbd724d78fb3aa1045827e4eab555f83576ed2
1 /**********************************************************************
3 pack.c -
5 $Author$
6 created at: Thu Feb 10 15:17:05 JST 1994
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/internal/config.h"
14 #include <ctype.h>
15 #include <errno.h>
16 #include <float.h>
17 #include <sys/types.h>
19 #include "internal.h"
20 #include "internal/array.h"
21 #include "internal/bits.h"
22 #include "internal/string.h"
23 #include "internal/symbol.h"
24 #include "internal/variable.h"
25 #include "ruby/util.h"
27 #include "builtin.h"
30 * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
31 * instead of HAVE_LONG_LONG or LONG_LONG.
32 * This means q! and Q! means always the standard long long type and
33 * causes ArgumentError for platforms which has no long long type,
34 * even if the platform has an implementation specific 64bit type.
35 * This behavior is consistent with the document of pack/unpack.
37 #ifdef HAVE_TRUE_LONG_LONG
38 static const char natstr[] = "sSiIlLqQjJ";
39 #else
40 static const char natstr[] = "sSiIlLjJ";
41 #endif
42 static const char endstr[] = "sSiIlLqQjJ";
44 #ifdef HAVE_TRUE_LONG_LONG
45 /* It is intentional to use long long instead of LONG_LONG. */
46 # define NATINT_LEN_Q NATINT_LEN(long long, 8)
47 #else
48 # define NATINT_LEN_Q 8
49 #endif
51 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
52 # define NATINT_PACK
53 #endif
55 #ifdef DYNAMIC_ENDIAN
56 /* for universal binary of NEXTSTEP and MacOS X */
57 /* useless since autoconf 2.63? */
58 static int
59 is_bigendian(void)
61 static int init = 0;
62 static int endian_value;
63 char *p;
65 if (init) return endian_value;
66 init = 1;
67 p = (char*)&init;
68 return endian_value = p[0]?0:1;
70 # define BIGENDIAN_P() (is_bigendian())
71 #elif defined(WORDS_BIGENDIAN)
72 # define BIGENDIAN_P() 1
73 #else
74 # define BIGENDIAN_P() 0
75 #endif
77 #ifdef NATINT_PACK
78 # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
79 #else
80 # define NATINT_LEN(type,len) ((int)sizeof(type))
81 #endif
83 typedef union {
84 float f;
85 uint32_t u;
86 char buf[4];
87 } FLOAT_SWAPPER;
88 typedef union {
89 double d;
90 uint64_t u;
91 char buf[8];
92 } DOUBLE_SWAPPER;
93 #define swapf(x) swap32(x)
94 #define swapd(x) swap64(x)
96 #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
97 #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
98 #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
99 #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
100 #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
101 #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
102 #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
103 #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
105 #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
106 #define HTONF(x) ((x).u = rb_htonf((x).u))
107 #define HTOVF(x) ((x).u = rb_htovf((x).u))
108 #define NTOHF(x) ((x).u = rb_ntohf((x).u))
109 #define VTOHF(x) ((x).u = rb_vtohf((x).u))
111 #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
112 #define HTOND(x) ((x).u = rb_htond((x).u))
113 #define HTOVD(x) ((x).u = rb_htovd((x).u))
114 #define NTOHD(x) ((x).u = rb_ntohd((x).u))
115 #define VTOHD(x) ((x).u = rb_vtohd((x).u))
117 #define MAX_INTEGER_PACK_SIZE 8
119 static const char toofew[] = "too few arguments";
121 static void encodes(VALUE,const char*,long,int,int);
122 static void qpencode(VALUE,VALUE,long);
124 static unsigned long utf8_to_uv(const char*,long*);
126 static ID id_associated;
128 static void
129 str_associate(VALUE str, VALUE add)
131 /* assert(NIL_P(rb_attr_get(str, id_associated))); */
132 rb_ivar_set(str, id_associated, add);
135 static VALUE
136 str_associated(VALUE str)
138 VALUE associates = rb_ivar_lookup(str, id_associated, Qfalse);
139 if (!associates)
140 rb_raise(rb_eArgError, "no associated pointer");
141 return associates;
144 static VALUE
145 associated_pointer(VALUE associates, const char *t)
147 const VALUE *p = RARRAY_CONST_PTR(associates);
148 const VALUE *pend = p + RARRAY_LEN(associates);
149 for (; p < pend; p++) {
150 VALUE tmp = *p;
151 if (RB_TYPE_P(tmp, T_STRING) && RSTRING_PTR(tmp) == t) return tmp;
153 rb_raise(rb_eArgError, "non associated pointer");
154 UNREACHABLE_RETURN(Qnil);
157 static void
158 unknown_directive(const char *mode, char type, VALUE fmt)
160 char unknown[5];
162 if (ISPRINT(type)) {
163 unknown[0] = type;
164 unknown[1] = '\0';
166 else {
167 snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
169 fmt = rb_str_quote_unprintable(fmt);
170 rb_warning("unknown %s directive '%s' in '%"PRIsVALUE"'",
171 mode, unknown, fmt);
174 static float
175 VALUE_to_float(VALUE obj)
177 VALUE v = rb_to_float(obj);
178 double d = RFLOAT_VALUE(v);
180 if (isnan(d)) {
181 return NAN;
183 else if (d < -FLT_MAX) {
184 return -INFINITY;
186 else if (d <= FLT_MAX) {
187 return d;
189 else {
190 return INFINITY;
194 static VALUE
195 pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
197 static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
198 static const char spc10[] = " ";
199 const char *p, *pend;
200 VALUE res, from, associates = 0;
201 char type;
202 long len, idx, plen;
203 const char *ptr;
204 int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
205 #ifdef NATINT_PACK
206 int natint; /* native integer */
207 #endif
208 int integer_size, bigendian_p;
210 StringValue(fmt);
211 p = RSTRING_PTR(fmt);
212 pend = p + RSTRING_LEN(fmt);
214 if (NIL_P(buffer)) {
215 res = rb_str_buf_new(0);
217 else {
218 if (!RB_TYPE_P(buffer, T_STRING))
219 rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
220 res = buffer;
223 idx = 0;
225 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
226 #define MORE_ITEM (idx < RARRAY_LEN(ary))
227 #define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
228 #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
230 while (p < pend) {
231 int explicit_endian = 0;
232 if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
233 rb_raise(rb_eRuntimeError, "format string modified");
235 type = *p++; /* get data type */
236 #ifdef NATINT_PACK
237 natint = 0;
238 #endif
240 if (ISSPACE(type)) continue;
241 if (type == '#') {
242 while ((p < pend) && (*p != '\n')) {
243 p++;
245 continue;
249 modifiers:
250 switch (*p) {
251 case '_':
252 case '!':
253 if (strchr(natstr, type)) {
254 #ifdef NATINT_PACK
255 natint = 1;
256 #endif
257 p++;
259 else {
260 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
262 goto modifiers;
264 case '<':
265 case '>':
266 if (!strchr(endstr, type)) {
267 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
269 if (explicit_endian) {
270 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
272 explicit_endian = *p++;
273 goto modifiers;
277 if (*p == '*') { /* set data length */
278 len = strchr("@Xxu", type) ? 0
279 : strchr("PMm", type) ? 1
280 : RARRAY_LEN(ary) - idx;
281 p++;
283 else if (ISDIGIT(*p)) {
284 errno = 0;
285 len = STRTOUL(p, (char**)&p, 10);
286 if (errno) {
287 rb_raise(rb_eRangeError, "pack length too big");
290 else {
291 len = 1;
294 switch (type) {
295 case 'U':
296 /* if encoding is US-ASCII, upgrade to UTF-8 */
297 if (enc_info == 1) enc_info = 2;
298 break;
299 case 'm': case 'M': case 'u':
300 /* keep US-ASCII (do nothing) */
301 break;
302 default:
303 /* fall back to BINARY */
304 enc_info = 0;
305 break;
307 switch (type) {
308 case 'A': case 'a': case 'Z':
309 case 'B': case 'b':
310 case 'H': case 'h':
311 from = NEXTFROM;
312 if (NIL_P(from)) {
313 ptr = "";
314 plen = 0;
316 else {
317 StringValue(from);
318 ptr = RSTRING_PTR(from);
319 plen = RSTRING_LEN(from);
322 if (p[-1] == '*')
323 len = plen;
325 switch (type) {
326 case 'a': /* arbitrary binary string (null padded) */
327 case 'A': /* arbitrary binary string (ASCII space padded) */
328 case 'Z': /* null terminated string */
329 if (plen >= len) {
330 rb_str_buf_cat(res, ptr, len);
331 if (p[-1] == '*' && type == 'Z')
332 rb_str_buf_cat(res, nul10, 1);
334 else {
335 rb_str_buf_cat(res, ptr, plen);
336 len -= plen;
337 while (len >= 10) {
338 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
339 len -= 10;
341 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
343 break;
345 #define castchar(from) (char)((from) & 0xff)
347 case 'b': /* bit string (ascending) */
349 int byte = 0;
350 long i, j = 0;
352 if (len > plen) {
353 j = (len - plen + 1)/2;
354 len = plen;
356 for (i=0; i++ < len; ptr++) {
357 if (*ptr & 1)
358 byte |= 128;
359 if (i & 7)
360 byte >>= 1;
361 else {
362 char c = castchar(byte);
363 rb_str_buf_cat(res, &c, 1);
364 byte = 0;
367 if (len & 7) {
368 char c;
369 byte >>= 7 - (len & 7);
370 c = castchar(byte);
371 rb_str_buf_cat(res, &c, 1);
373 len = j;
374 goto grow;
376 break;
378 case 'B': /* bit string (descending) */
380 int byte = 0;
381 long i, j = 0;
383 if (len > plen) {
384 j = (len - plen + 1)/2;
385 len = plen;
387 for (i=0; i++ < len; ptr++) {
388 byte |= *ptr & 1;
389 if (i & 7)
390 byte <<= 1;
391 else {
392 char c = castchar(byte);
393 rb_str_buf_cat(res, &c, 1);
394 byte = 0;
397 if (len & 7) {
398 char c;
399 byte <<= 7 - (len & 7);
400 c = castchar(byte);
401 rb_str_buf_cat(res, &c, 1);
403 len = j;
404 goto grow;
406 break;
408 case 'h': /* hex string (low nibble first) */
410 int byte = 0;
411 long i, j = 0;
413 if (len > plen) {
414 j = (len + 1) / 2 - (plen + 1) / 2;
415 len = plen;
417 for (i=0; i++ < len; ptr++) {
418 if (ISALPHA(*ptr))
419 byte |= (((*ptr & 15) + 9) & 15) << 4;
420 else
421 byte |= (*ptr & 15) << 4;
422 if (i & 1)
423 byte >>= 4;
424 else {
425 char c = castchar(byte);
426 rb_str_buf_cat(res, &c, 1);
427 byte = 0;
430 if (len & 1) {
431 char c = castchar(byte);
432 rb_str_buf_cat(res, &c, 1);
434 len = j;
435 goto grow;
437 break;
439 case 'H': /* hex string (high nibble first) */
441 int byte = 0;
442 long i, j = 0;
444 if (len > plen) {
445 j = (len + 1) / 2 - (plen + 1) / 2;
446 len = plen;
448 for (i=0; i++ < len; ptr++) {
449 if (ISALPHA(*ptr))
450 byte |= ((*ptr & 15) + 9) & 15;
451 else
452 byte |= *ptr & 15;
453 if (i & 1)
454 byte <<= 4;
455 else {
456 char c = castchar(byte);
457 rb_str_buf_cat(res, &c, 1);
458 byte = 0;
461 if (len & 1) {
462 char c = castchar(byte);
463 rb_str_buf_cat(res, &c, 1);
465 len = j;
466 goto grow;
468 break;
470 break;
472 case 'c': /* signed char */
473 case 'C': /* unsigned char */
474 integer_size = 1;
475 bigendian_p = BIGENDIAN_P(); /* not effective */
476 goto pack_integer;
478 case 's': /* s for int16_t, s! for signed short */
479 integer_size = NATINT_LEN(short, 2);
480 bigendian_p = BIGENDIAN_P();
481 goto pack_integer;
483 case 'S': /* S for uint16_t, S! for unsigned short */
484 integer_size = NATINT_LEN(short, 2);
485 bigendian_p = BIGENDIAN_P();
486 goto pack_integer;
488 case 'i': /* i and i! for signed int */
489 integer_size = (int)sizeof(int);
490 bigendian_p = BIGENDIAN_P();
491 goto pack_integer;
493 case 'I': /* I and I! for unsigned int */
494 integer_size = (int)sizeof(int);
495 bigendian_p = BIGENDIAN_P();
496 goto pack_integer;
498 case 'l': /* l for int32_t, l! for signed long */
499 integer_size = NATINT_LEN(long, 4);
500 bigendian_p = BIGENDIAN_P();
501 goto pack_integer;
503 case 'L': /* L for uint32_t, L! for unsigned long */
504 integer_size = NATINT_LEN(long, 4);
505 bigendian_p = BIGENDIAN_P();
506 goto pack_integer;
508 case 'q': /* q for int64_t, q! for signed long long */
509 integer_size = NATINT_LEN_Q;
510 bigendian_p = BIGENDIAN_P();
511 goto pack_integer;
513 case 'Q': /* Q for uint64_t, Q! for unsigned long long */
514 integer_size = NATINT_LEN_Q;
515 bigendian_p = BIGENDIAN_P();
516 goto pack_integer;
518 case 'j': /* j for intptr_t */
519 integer_size = sizeof(intptr_t);
520 bigendian_p = BIGENDIAN_P();
521 goto pack_integer;
523 case 'J': /* J for uintptr_t */
524 integer_size = sizeof(uintptr_t);
525 bigendian_p = BIGENDIAN_P();
526 goto pack_integer;
528 case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
529 integer_size = 2;
530 bigendian_p = 1;
531 goto pack_integer;
533 case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
534 integer_size = 4;
535 bigendian_p = 1;
536 goto pack_integer;
538 case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
539 integer_size = 2;
540 bigendian_p = 0;
541 goto pack_integer;
543 case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
544 integer_size = 4;
545 bigendian_p = 0;
546 goto pack_integer;
548 pack_integer:
549 if (explicit_endian) {
550 bigendian_p = explicit_endian == '>';
552 if (integer_size > MAX_INTEGER_PACK_SIZE)
553 rb_bug("unexpected intger size for pack: %d", integer_size);
554 while (len-- > 0) {
555 char intbuf[MAX_INTEGER_PACK_SIZE];
557 from = NEXTFROM;
558 rb_integer_pack(from, intbuf, integer_size, 1, 0,
559 INTEGER_PACK_2COMP |
560 (bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN));
561 rb_str_buf_cat(res, intbuf, integer_size);
563 break;
565 case 'f': /* single precision float in native format */
566 case 'F': /* ditto */
567 while (len-- > 0) {
568 float f;
570 from = NEXTFROM;
571 f = VALUE_to_float(from);
572 rb_str_buf_cat(res, (char*)&f, sizeof(float));
574 break;
576 case 'e': /* single precision float in VAX byte-order */
577 while (len-- > 0) {
578 FLOAT_CONVWITH(tmp);
580 from = NEXTFROM;
581 tmp.f = VALUE_to_float(from);
582 HTOVF(tmp);
583 rb_str_buf_cat(res, tmp.buf, sizeof(float));
585 break;
587 case 'E': /* double precision float in VAX byte-order */
588 while (len-- > 0) {
589 DOUBLE_CONVWITH(tmp);
590 from = NEXTFROM;
591 tmp.d = RFLOAT_VALUE(rb_to_float(from));
592 HTOVD(tmp);
593 rb_str_buf_cat(res, tmp.buf, sizeof(double));
595 break;
597 case 'd': /* double precision float in native format */
598 case 'D': /* ditto */
599 while (len-- > 0) {
600 double d;
602 from = NEXTFROM;
603 d = RFLOAT_VALUE(rb_to_float(from));
604 rb_str_buf_cat(res, (char*)&d, sizeof(double));
606 break;
608 case 'g': /* single precision float in network byte-order */
609 while (len-- > 0) {
610 FLOAT_CONVWITH(tmp);
611 from = NEXTFROM;
612 tmp.f = VALUE_to_float(from);
613 HTONF(tmp);
614 rb_str_buf_cat(res, tmp.buf, sizeof(float));
616 break;
618 case 'G': /* double precision float in network byte-order */
619 while (len-- > 0) {
620 DOUBLE_CONVWITH(tmp);
622 from = NEXTFROM;
623 tmp.d = RFLOAT_VALUE(rb_to_float(from));
624 HTOND(tmp);
625 rb_str_buf_cat(res, tmp.buf, sizeof(double));
627 break;
629 case 'x': /* null byte */
630 grow:
631 while (len >= 10) {
632 rb_str_buf_cat(res, nul10, 10);
633 len -= 10;
635 rb_str_buf_cat(res, nul10, len);
636 break;
638 case 'X': /* back up byte */
639 shrink:
640 plen = RSTRING_LEN(res);
641 if (plen < len)
642 rb_raise(rb_eArgError, "X outside of string");
643 rb_str_set_len(res, plen - len);
644 break;
646 case '@': /* null fill to absolute position */
647 len -= RSTRING_LEN(res);
648 if (len > 0) goto grow;
649 len = -len;
650 if (len > 0) goto shrink;
651 break;
653 case '%':
654 rb_raise(rb_eArgError, "%% is not supported");
655 break;
657 case 'U': /* Unicode character */
658 while (len-- > 0) {
659 SIGNED_VALUE l;
660 char buf[8];
661 int le;
663 from = NEXTFROM;
664 from = rb_to_int(from);
665 l = NUM2LONG(from);
666 if (l < 0) {
667 rb_raise(rb_eRangeError, "pack(U): value out of range");
669 le = rb_uv_to_utf8(buf, l);
670 rb_str_buf_cat(res, (char*)buf, le);
672 break;
674 case 'u': /* uuencoded string */
675 case 'm': /* base64 encoded string */
676 from = NEXTFROM;
677 StringValue(from);
678 ptr = RSTRING_PTR(from);
679 plen = RSTRING_LEN(from);
681 if (len == 0 && type == 'm') {
682 encodes(res, ptr, plen, type, 0);
683 ptr += plen;
684 break;
686 if (len <= 2)
687 len = 45;
688 else if (len > 63 && type == 'u')
689 len = 63;
690 else
691 len = len / 3 * 3;
692 while (plen > 0) {
693 long todo;
695 if (plen > len)
696 todo = len;
697 else
698 todo = plen;
699 encodes(res, ptr, todo, type, 1);
700 plen -= todo;
701 ptr += todo;
703 break;
705 case 'M': /* quoted-printable encoded string */
706 from = rb_obj_as_string(NEXTFROM);
707 if (len <= 1)
708 len = 72;
709 qpencode(res, from, len);
710 break;
712 case 'P': /* pointer to packed byte string */
713 from = THISFROM;
714 if (!NIL_P(from)) {
715 StringValue(from);
716 if (RSTRING_LEN(from) < len) {
717 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
718 RSTRING_LEN(from), len);
721 len = 1;
722 /* FALL THROUGH */
723 case 'p': /* pointer to string */
724 while (len-- > 0) {
725 char *t;
726 from = NEXTFROM;
727 if (NIL_P(from)) {
728 t = 0;
730 else {
731 t = StringValuePtr(from);
733 if (!associates) {
734 associates = rb_ary_new();
736 rb_ary_push(associates, from);
737 rb_str_buf_cat(res, (char*)&t, sizeof(char*));
739 break;
741 case 'w': /* BER compressed integer */
742 while (len-- > 0) {
743 VALUE buf = rb_str_new(0, 0);
744 size_t numbytes;
745 int sign;
746 char *cp;
748 from = NEXTFROM;
749 from = rb_to_int(from);
750 numbytes = rb_absint_numwords(from, 7, NULL);
751 if (numbytes == 0)
752 numbytes = 1;
753 buf = rb_str_new(NULL, numbytes);
755 sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
757 if (sign < 0)
758 rb_raise(rb_eArgError, "can't compress negative numbers");
759 if (sign == 2)
760 rb_bug("buffer size problem?");
762 cp = RSTRING_PTR(buf);
763 while (1 < numbytes) {
764 *cp |= 0x80;
765 cp++;
766 numbytes--;
769 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
771 break;
773 default: {
774 unknown_directive("pack", type, fmt);
775 break;
780 if (associates) {
781 str_associate(res, associates);
783 switch (enc_info) {
784 case 1:
785 ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
786 break;
787 case 2:
788 rb_enc_set_index(res, rb_utf8_encindex());
789 break;
790 default:
791 /* do nothing, keep ASCII-8BIT */
792 break;
794 return res;
797 static const char uu_table[] =
798 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
799 static const char b64_table[] =
800 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
802 static void
803 encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
805 enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
806 char buff[buff_size + 1]; /* +1 for tail_lf */
807 long i = 0;
808 const char *const trans = type == 'u' ? uu_table : b64_table;
809 char padding;
810 const unsigned char *s = (const unsigned char *)s0;
812 if (type == 'u') {
813 buff[i++] = (char)len + ' ';
814 padding = '`';
816 else {
817 padding = '=';
819 while (len >= input_unit) {
820 while (len >= input_unit && buff_size-i >= encoded_unit) {
821 buff[i++] = trans[077 & (*s >> 2)];
822 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
823 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
824 buff[i++] = trans[077 & s[2]];
825 s += input_unit;
826 len -= input_unit;
828 if (buff_size-i < encoded_unit) {
829 rb_str_buf_cat(str, buff, i);
830 i = 0;
834 if (len == 2) {
835 buff[i++] = trans[077 & (*s >> 2)];
836 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
837 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
838 buff[i++] = padding;
840 else if (len == 1) {
841 buff[i++] = trans[077 & (*s >> 2)];
842 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
843 buff[i++] = padding;
844 buff[i++] = padding;
846 if (tail_lf) buff[i++] = '\n';
847 rb_str_buf_cat(str, buff, i);
848 if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
851 static const char hex_table[] = "0123456789ABCDEF";
853 static void
854 qpencode(VALUE str, VALUE from, long len)
856 char buff[1024];
857 long i = 0, n = 0, prev = EOF;
858 unsigned char *s = (unsigned char*)RSTRING_PTR(from);
859 unsigned char *send = s + RSTRING_LEN(from);
861 while (s < send) {
862 if ((*s > 126) ||
863 (*s < 32 && *s != '\n' && *s != '\t') ||
864 (*s == '=')) {
865 buff[i++] = '=';
866 buff[i++] = hex_table[*s >> 4];
867 buff[i++] = hex_table[*s & 0x0f];
868 n += 3;
869 prev = EOF;
871 else if (*s == '\n') {
872 if (prev == ' ' || prev == '\t') {
873 buff[i++] = '=';
874 buff[i++] = *s;
876 buff[i++] = *s;
877 n = 0;
878 prev = *s;
880 else {
881 buff[i++] = *s;
882 n++;
883 prev = *s;
885 if (n > len) {
886 buff[i++] = '=';
887 buff[i++] = '\n';
888 n = 0;
889 prev = '\n';
891 if (i > 1024 - 5) {
892 rb_str_buf_cat(str, buff, i);
893 i = 0;
895 s++;
897 if (n > 0) {
898 buff[i++] = '=';
899 buff[i++] = '\n';
901 if (i > 0) {
902 rb_str_buf_cat(str, buff, i);
906 static inline int
907 hex2num(char c)
909 int n;
910 n = ruby_digit36_to_number_table[(unsigned char)c];
911 if (16 <= n)
912 n = -1;
913 return n;
916 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
917 tmp_len = 0; \
918 if (len > (long)((send-s)/(sz))) { \
919 if (!star) { \
920 tmp_len = len-(send-s)/(sz); \
922 len = (send-s)/(sz); \
924 } while (0)
926 #define PACK_ITEM_ADJUST() do { \
927 if (tmp_len > 0 && mode == UNPACK_ARRAY) \
928 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
929 } while (0)
931 /* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
932 * 12.4/12.5/12.6 C compiler optimization bug
933 * with "-xO4" optimization option.
935 #if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
936 # define AVOID_CC_BUG volatile
937 #else
938 # define AVOID_CC_BUG
939 #endif
941 /* unpack mode */
942 #define UNPACK_ARRAY 0
943 #define UNPACK_BLOCK 1
944 #define UNPACK_1 2
946 static VALUE
947 pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset)
949 #define hexdigits ruby_hexdigits
950 char *s, *send;
951 char *p, *pend;
952 VALUE ary, associates = Qfalse;
953 char type;
954 long len;
955 AVOID_CC_BUG long tmp_len;
956 int star;
957 #ifdef NATINT_PACK
958 int natint; /* native integer */
959 #endif
960 int signed_p, integer_size, bigendian_p;
961 #define UNPACK_PUSH(item) do {\
962 VALUE item_val = (item);\
963 if ((mode) == UNPACK_BLOCK) {\
964 rb_yield(item_val);\
966 else if ((mode) == UNPACK_ARRAY) {\
967 rb_ary_push(ary, item_val);\
969 else /* if ((mode) == UNPACK_1) { */ {\
970 return item_val; \
972 } while (0)
974 StringValue(str);
975 StringValue(fmt);
977 if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
978 len = RSTRING_LEN(str);
979 if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
981 s = RSTRING_PTR(str);
982 send = s + len;
983 s += offset;
985 p = RSTRING_PTR(fmt);
986 pend = p + RSTRING_LEN(fmt);
988 #define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type))
990 ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
991 while (p < pend) {
992 int explicit_endian = 0;
993 type = *p++;
994 #ifdef NATINT_PACK
995 natint = 0;
996 #endif
998 if (ISSPACE(type)) continue;
999 if (type == '#') {
1000 while ((p < pend) && (*p != '\n')) {
1001 p++;
1003 continue;
1006 star = 0;
1008 modifiers:
1009 switch (*p) {
1010 case '_':
1011 case '!':
1013 if (strchr(natstr, type)) {
1014 #ifdef NATINT_PACK
1015 natint = 1;
1016 #endif
1017 p++;
1019 else {
1020 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
1022 goto modifiers;
1024 case '<':
1025 case '>':
1026 if (!strchr(endstr, type)) {
1027 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
1029 if (explicit_endian) {
1030 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1032 explicit_endian = *p++;
1033 goto modifiers;
1037 if (p >= pend)
1038 len = 1;
1039 else if (*p == '*') {
1040 star = 1;
1041 len = send - s;
1042 p++;
1044 else if (ISDIGIT(*p)) {
1045 errno = 0;
1046 len = STRTOUL(p, (char**)&p, 10);
1047 if (len < 0 || errno) {
1048 rb_raise(rb_eRangeError, "pack length too big");
1051 else {
1052 len = (type != '@');
1055 switch (type) {
1056 case '%':
1057 rb_raise(rb_eArgError, "%% is not supported");
1058 break;
1060 case 'A':
1061 if (len > send - s) len = send - s;
1063 long end = len;
1064 char *t = s + len - 1;
1066 while (t >= s) {
1067 if (*t != ' ' && *t != '\0') break;
1068 t--; len--;
1070 UNPACK_PUSH(rb_str_new(s, len));
1071 s += end;
1073 break;
1075 case 'Z':
1077 char *t = s;
1079 if (len > send-s) len = send-s;
1080 while (t < s+len && *t) t++;
1081 UNPACK_PUSH(rb_str_new(s, t-s));
1082 if (t < send) t++;
1083 s = star ? t : s+len;
1085 break;
1087 case 'a':
1088 if (len > send - s) len = send - s;
1089 UNPACK_PUSH(rb_str_new(s, len));
1090 s += len;
1091 break;
1093 case 'b':
1095 VALUE bitstr;
1096 char *t;
1097 int bits;
1098 long i;
1100 if (p[-1] == '*' || len > (send - s) * 8)
1101 len = (send - s) * 8;
1102 bits = 0;
1103 bitstr = rb_usascii_str_new(0, len);
1104 t = RSTRING_PTR(bitstr);
1105 for (i=0; i<len; i++) {
1106 if (i & 7) bits >>= 1;
1107 else bits = (unsigned char)*s++;
1108 *t++ = (bits & 1) ? '1' : '0';
1110 UNPACK_PUSH(bitstr);
1112 break;
1114 case 'B':
1116 VALUE bitstr;
1117 char *t;
1118 int bits;
1119 long i;
1121 if (p[-1] == '*' || len > (send - s) * 8)
1122 len = (send - s) * 8;
1123 bits = 0;
1124 bitstr = rb_usascii_str_new(0, len);
1125 t = RSTRING_PTR(bitstr);
1126 for (i=0; i<len; i++) {
1127 if (i & 7) bits <<= 1;
1128 else bits = (unsigned char)*s++;
1129 *t++ = (bits & 128) ? '1' : '0';
1131 UNPACK_PUSH(bitstr);
1133 break;
1135 case 'h':
1137 VALUE bitstr;
1138 char *t;
1139 int bits;
1140 long i;
1142 if (p[-1] == '*' || len > (send - s) * 2)
1143 len = (send - s) * 2;
1144 bits = 0;
1145 bitstr = rb_usascii_str_new(0, len);
1146 t = RSTRING_PTR(bitstr);
1147 for (i=0; i<len; i++) {
1148 if (i & 1)
1149 bits >>= 4;
1150 else
1151 bits = (unsigned char)*s++;
1152 *t++ = hexdigits[bits & 15];
1154 UNPACK_PUSH(bitstr);
1156 break;
1158 case 'H':
1160 VALUE bitstr;
1161 char *t;
1162 int bits;
1163 long i;
1165 if (p[-1] == '*' || len > (send - s) * 2)
1166 len = (send - s) * 2;
1167 bits = 0;
1168 bitstr = rb_usascii_str_new(0, len);
1169 t = RSTRING_PTR(bitstr);
1170 for (i=0; i<len; i++) {
1171 if (i & 1)
1172 bits <<= 4;
1173 else
1174 bits = (unsigned char)*s++;
1175 *t++ = hexdigits[(bits >> 4) & 15];
1177 UNPACK_PUSH(bitstr);
1179 break;
1181 case 'c':
1182 signed_p = 1;
1183 integer_size = 1;
1184 bigendian_p = BIGENDIAN_P(); /* not effective */
1185 goto unpack_integer;
1187 case 'C':
1188 signed_p = 0;
1189 integer_size = 1;
1190 bigendian_p = BIGENDIAN_P(); /* not effective */
1191 goto unpack_integer;
1193 case 's':
1194 signed_p = 1;
1195 integer_size = NATINT_LEN(short, 2);
1196 bigendian_p = BIGENDIAN_P();
1197 goto unpack_integer;
1199 case 'S':
1200 signed_p = 0;
1201 integer_size = NATINT_LEN(short, 2);
1202 bigendian_p = BIGENDIAN_P();
1203 goto unpack_integer;
1205 case 'i':
1206 signed_p = 1;
1207 integer_size = (int)sizeof(int);
1208 bigendian_p = BIGENDIAN_P();
1209 goto unpack_integer;
1211 case 'I':
1212 signed_p = 0;
1213 integer_size = (int)sizeof(int);
1214 bigendian_p = BIGENDIAN_P();
1215 goto unpack_integer;
1217 case 'l':
1218 signed_p = 1;
1219 integer_size = NATINT_LEN(long, 4);
1220 bigendian_p = BIGENDIAN_P();
1221 goto unpack_integer;
1223 case 'L':
1224 signed_p = 0;
1225 integer_size = NATINT_LEN(long, 4);
1226 bigendian_p = BIGENDIAN_P();
1227 goto unpack_integer;
1229 case 'q':
1230 signed_p = 1;
1231 integer_size = NATINT_LEN_Q;
1232 bigendian_p = BIGENDIAN_P();
1233 goto unpack_integer;
1235 case 'Q':
1236 signed_p = 0;
1237 integer_size = NATINT_LEN_Q;
1238 bigendian_p = BIGENDIAN_P();
1239 goto unpack_integer;
1241 case 'j':
1242 signed_p = 1;
1243 integer_size = sizeof(intptr_t);
1244 bigendian_p = BIGENDIAN_P();
1245 goto unpack_integer;
1247 case 'J':
1248 signed_p = 0;
1249 integer_size = sizeof(uintptr_t);
1250 bigendian_p = BIGENDIAN_P();
1251 goto unpack_integer;
1253 case 'n':
1254 signed_p = 0;
1255 integer_size = 2;
1256 bigendian_p = 1;
1257 goto unpack_integer;
1259 case 'N':
1260 signed_p = 0;
1261 integer_size = 4;
1262 bigendian_p = 1;
1263 goto unpack_integer;
1265 case 'v':
1266 signed_p = 0;
1267 integer_size = 2;
1268 bigendian_p = 0;
1269 goto unpack_integer;
1271 case 'V':
1272 signed_p = 0;
1273 integer_size = 4;
1274 bigendian_p = 0;
1275 goto unpack_integer;
1277 unpack_integer:
1278 if (explicit_endian) {
1279 bigendian_p = explicit_endian == '>';
1281 PACK_LENGTH_ADJUST_SIZE(integer_size);
1282 while (len-- > 0) {
1283 int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1284 VALUE val;
1285 if (signed_p)
1286 flags |= INTEGER_PACK_2COMP;
1287 val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1288 UNPACK_PUSH(val);
1289 s += integer_size;
1291 PACK_ITEM_ADJUST();
1292 break;
1294 case 'f':
1295 case 'F':
1296 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1297 while (len-- > 0) {
1298 float tmp;
1299 UNPACK_FETCH(&tmp, float);
1300 UNPACK_PUSH(DBL2NUM((double)tmp));
1302 PACK_ITEM_ADJUST();
1303 break;
1305 case 'e':
1306 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1307 while (len-- > 0) {
1308 FLOAT_CONVWITH(tmp);
1309 UNPACK_FETCH(tmp.buf, float);
1310 VTOHF(tmp);
1311 UNPACK_PUSH(DBL2NUM(tmp.f));
1313 PACK_ITEM_ADJUST();
1314 break;
1316 case 'E':
1317 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1318 while (len-- > 0) {
1319 DOUBLE_CONVWITH(tmp);
1320 UNPACK_FETCH(tmp.buf, double);
1321 VTOHD(tmp);
1322 UNPACK_PUSH(DBL2NUM(tmp.d));
1324 PACK_ITEM_ADJUST();
1325 break;
1327 case 'D':
1328 case 'd':
1329 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1330 while (len-- > 0) {
1331 double tmp;
1332 UNPACK_FETCH(&tmp, double);
1333 UNPACK_PUSH(DBL2NUM(tmp));
1335 PACK_ITEM_ADJUST();
1336 break;
1338 case 'g':
1339 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1340 while (len-- > 0) {
1341 FLOAT_CONVWITH(tmp);
1342 UNPACK_FETCH(tmp.buf, float);
1343 NTOHF(tmp);
1344 UNPACK_PUSH(DBL2NUM(tmp.f));
1346 PACK_ITEM_ADJUST();
1347 break;
1349 case 'G':
1350 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1351 while (len-- > 0) {
1352 DOUBLE_CONVWITH(tmp);
1353 UNPACK_FETCH(tmp.buf, double);
1354 NTOHD(tmp);
1355 UNPACK_PUSH(DBL2NUM(tmp.d));
1357 PACK_ITEM_ADJUST();
1358 break;
1360 case 'U':
1361 if (len > send - s) len = send - s;
1362 while (len > 0 && s < send) {
1363 long alen = send - s;
1364 unsigned long l;
1366 l = utf8_to_uv(s, &alen);
1367 s += alen; len--;
1368 UNPACK_PUSH(ULONG2NUM(l));
1370 break;
1372 case 'u':
1374 VALUE buf = rb_str_new(0, (send - s)*3/4);
1375 char *ptr = RSTRING_PTR(buf);
1376 long total = 0;
1378 while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1379 long a,b,c,d;
1380 char hunk[3];
1382 len = ((unsigned char)*s++ - ' ') & 077;
1384 total += len;
1385 if (total > RSTRING_LEN(buf)) {
1386 len -= total - RSTRING_LEN(buf);
1387 total = RSTRING_LEN(buf);
1390 while (len > 0) {
1391 long mlen = len > 3 ? 3 : len;
1393 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1394 a = ((unsigned char)*s++ - ' ') & 077;
1395 else
1396 a = 0;
1397 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1398 b = ((unsigned char)*s++ - ' ') & 077;
1399 else
1400 b = 0;
1401 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1402 c = ((unsigned char)*s++ - ' ') & 077;
1403 else
1404 c = 0;
1405 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1406 d = ((unsigned char)*s++ - ' ') & 077;
1407 else
1408 d = 0;
1409 hunk[0] = (char)(a << 2 | b >> 4);
1410 hunk[1] = (char)(b << 4 | c >> 2);
1411 hunk[2] = (char)(c << 6 | d);
1412 memcpy(ptr, hunk, mlen);
1413 ptr += mlen;
1414 len -= mlen;
1416 if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1417 s++; /* possible checksum byte */
1418 if (s < send && *s == '\r') s++;
1419 if (s < send && *s == '\n') s++;
1422 rb_str_set_len(buf, total);
1423 UNPACK_PUSH(buf);
1425 break;
1427 case 'm':
1429 VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
1430 char *ptr = RSTRING_PTR(buf);
1431 int a = -1,b = -1,c = 0,d = 0;
1432 static signed char b64_xtable[256];
1434 if (b64_xtable['/'] <= 0) {
1435 int i;
1437 for (i = 0; i < 256; i++) {
1438 b64_xtable[i] = -1;
1440 for (i = 0; i < 64; i++) {
1441 b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1444 if (len == 0) {
1445 while (s < send) {
1446 a = b = c = d = -1;
1447 a = b64_xtable[(unsigned char)*s++];
1448 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1449 b = b64_xtable[(unsigned char)*s++];
1450 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1451 if (*s == '=') {
1452 if (s + 2 == send && *(s + 1) == '=') break;
1453 rb_raise(rb_eArgError, "invalid base64");
1455 c = b64_xtable[(unsigned char)*s++];
1456 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1457 if (s + 1 == send && *s == '=') break;
1458 d = b64_xtable[(unsigned char)*s++];
1459 if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1460 *ptr++ = castchar(a << 2 | b >> 4);
1461 *ptr++ = castchar(b << 4 | c >> 2);
1462 *ptr++ = castchar(c << 6 | d);
1464 if (c == -1) {
1465 *ptr++ = castchar(a << 2 | b >> 4);
1466 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1468 else if (d == -1) {
1469 *ptr++ = castchar(a << 2 | b >> 4);
1470 *ptr++ = castchar(b << 4 | c >> 2);
1471 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1474 else {
1475 while (s < send) {
1476 a = b = c = d = -1;
1477 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1478 if (s >= send) break;
1479 s++;
1480 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1481 if (s >= send) break;
1482 s++;
1483 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1484 if (*s == '=' || s >= send) break;
1485 s++;
1486 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1487 if (*s == '=' || s >= send) break;
1488 s++;
1489 *ptr++ = castchar(a << 2 | b >> 4);
1490 *ptr++ = castchar(b << 4 | c >> 2);
1491 *ptr++ = castchar(c << 6 | d);
1492 a = -1;
1494 if (a != -1 && b != -1) {
1495 if (c == -1)
1496 *ptr++ = castchar(a << 2 | b >> 4);
1497 else {
1498 *ptr++ = castchar(a << 2 | b >> 4);
1499 *ptr++ = castchar(b << 4 | c >> 2);
1503 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1504 UNPACK_PUSH(buf);
1506 break;
1508 case 'M':
1510 VALUE buf = rb_str_new(0, send - s);
1511 char *ptr = RSTRING_PTR(buf), *ss = s;
1512 int csum = 0;
1513 int c1, c2;
1515 while (s < send) {
1516 if (*s == '=') {
1517 if (++s == send) break;
1518 if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1519 s++;
1520 if (*s != '\n') {
1521 if ((c1 = hex2num(*s)) == -1) break;
1522 if (++s == send) break;
1523 if ((c2 = hex2num(*s)) == -1) break;
1524 csum |= *ptr++ = castchar(c1 << 4 | c2);
1527 else {
1528 csum |= *ptr++ = *s;
1530 s++;
1531 ss = s;
1533 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1534 rb_str_buf_cat(buf, ss, send-ss);
1535 csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
1536 ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
1537 UNPACK_PUSH(buf);
1539 break;
1541 case '@':
1542 if (len > RSTRING_LEN(str))
1543 rb_raise(rb_eArgError, "@ outside of string");
1544 s = RSTRING_PTR(str) + len;
1545 break;
1547 case 'X':
1548 if (len > s - RSTRING_PTR(str))
1549 rb_raise(rb_eArgError, "X outside of string");
1550 s -= len;
1551 break;
1553 case 'x':
1554 if (len > send - s)
1555 rb_raise(rb_eArgError, "x outside of string");
1556 s += len;
1557 break;
1559 case 'P':
1560 if (sizeof(char *) <= (size_t)(send - s)) {
1561 VALUE tmp = Qnil;
1562 char *t;
1564 UNPACK_FETCH(&t, char *);
1565 if (t) {
1566 if (!associates) associates = str_associated(str);
1567 tmp = associated_pointer(associates, t);
1568 if (len < RSTRING_LEN(tmp)) {
1569 tmp = rb_str_new(t, len);
1570 str_associate(tmp, associates);
1573 UNPACK_PUSH(tmp);
1575 break;
1577 case 'p':
1578 if (len > (long)((send - s) / sizeof(char *)))
1579 len = (send - s) / sizeof(char *);
1580 while (len-- > 0) {
1581 if ((size_t)(send - s) < sizeof(char *))
1582 break;
1583 else {
1584 VALUE tmp = Qnil;
1585 char *t;
1587 UNPACK_FETCH(&t, char *);
1588 if (t) {
1589 if (!associates) associates = str_associated(str);
1590 tmp = associated_pointer(associates, t);
1592 UNPACK_PUSH(tmp);
1595 break;
1597 case 'w':
1599 char *s0 = s;
1600 while (len > 0 && s < send) {
1601 if (*s & 0x80) {
1602 s++;
1604 else {
1605 s++;
1606 UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
1607 len--;
1608 s0 = s;
1612 break;
1614 default:
1615 unknown_directive("unpack", type, fmt);
1616 break;
1620 return ary;
1623 static VALUE
1624 pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1626 int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1627 return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
1630 static VALUE
1631 pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1633 return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
1637 rb_uv_to_utf8(char buf[6], unsigned long uv)
1639 if (uv <= 0x7f) {
1640 buf[0] = (char)uv;
1641 return 1;
1643 if (uv <= 0x7ff) {
1644 buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1645 buf[1] = castchar((uv&0x3f)|0x80);
1646 return 2;
1648 if (uv <= 0xffff) {
1649 buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1650 buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1651 buf[2] = castchar((uv&0x3f)|0x80);
1652 return 3;
1654 if (uv <= 0x1fffff) {
1655 buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1656 buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1657 buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1658 buf[3] = castchar((uv&0x3f)|0x80);
1659 return 4;
1661 if (uv <= 0x3ffffff) {
1662 buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1663 buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1664 buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1665 buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1666 buf[4] = castchar((uv&0x3f)|0x80);
1667 return 5;
1669 if (uv <= 0x7fffffff) {
1670 buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1671 buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1672 buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1673 buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1674 buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1675 buf[5] = castchar((uv&0x3f)|0x80);
1676 return 6;
1678 rb_raise(rb_eRangeError, "pack(U): value out of range");
1680 UNREACHABLE_RETURN(Qnil);
1683 static const unsigned long utf8_limits[] = {
1684 0x0, /* 1 */
1685 0x80, /* 2 */
1686 0x800, /* 3 */
1687 0x10000, /* 4 */
1688 0x200000, /* 5 */
1689 0x4000000, /* 6 */
1690 0x80000000, /* 7 */
1693 static unsigned long
1694 utf8_to_uv(const char *p, long *lenp)
1696 int c = *p++ & 0xff;
1697 unsigned long uv = c;
1698 long n;
1700 if (!(uv & 0x80)) {
1701 *lenp = 1;
1702 return uv;
1704 if (!(uv & 0x40)) {
1705 *lenp = 1;
1706 rb_raise(rb_eArgError, "malformed UTF-8 character");
1709 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1710 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1711 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1712 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1713 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1714 else {
1715 *lenp = 1;
1716 rb_raise(rb_eArgError, "malformed UTF-8 character");
1718 if (n > *lenp) {
1719 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1720 n, *lenp);
1722 *lenp = n--;
1723 if (n != 0) {
1724 while (n--) {
1725 c = *p++ & 0xff;
1726 if ((c & 0xc0) != 0x80) {
1727 *lenp -= n + 1;
1728 rb_raise(rb_eArgError, "malformed UTF-8 character");
1730 else {
1731 c &= 0x3f;
1732 uv = uv << 6 | c;
1736 n = *lenp - 1;
1737 if (uv < utf8_limits[n]) {
1738 rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1740 return uv;
1743 #include "pack.rbinc"
1745 void
1746 Init_pack(void)
1748 id_associated = rb_make_internal_id();