* io.c (rb_open_file): encoding in mode string was ignored if perm is
[ruby-svn.git] / pack.c
blob8f26365d7ccc9efe46a84f02f3ed93cb92c4ac57
1 /**********************************************************************
3 pack.c -
5 $Author$
6 created at: Thu Feb 10 15:17:05 JST 1994
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/ruby.h"
13 #include <sys/types.h>
14 #include <ctype.h>
15 #include <errno.h>
17 #define SIZE16 2
18 #define SIZE32 4
20 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4
21 # define NATINT_PACK
22 #endif
24 #ifdef NATINT_PACK
25 # define OFF16B(p) ((char*)(p) + (natint?0:(sizeof(short) - SIZE16)))
26 # define OFF32B(p) ((char*)(p) + (natint?0:(sizeof(long) - SIZE32)))
27 # define NATINT_LEN(type,len) (natint?sizeof(type):(len))
28 # ifdef WORDS_BIGENDIAN
29 # define OFF16(p) OFF16B(p)
30 # define OFF32(p) OFF32B(p)
31 # endif
32 # define NATINT_HTOVS(x) (natint?htovs(x):htov16(x))
33 # define NATINT_HTOVL(x) (natint?htovl(x):htov32(x))
34 # define NATINT_HTONS(x) (natint?htons(x):hton16(x))
35 # define NATINT_HTONL(x) (natint?htonl(x):hton32(x))
36 #else
37 # define NATINT_LEN(type,len) sizeof(type)
38 # define NATINT_HTOVS(x) htovs(x)
39 # define NATINT_HTOVL(x) htovl(x)
40 # define NATINT_HTONS(x) htons(x)
41 # define NATINT_HTONL(x) htonl(x)
42 #endif
44 #ifndef OFF16
45 # define OFF16(p) (char*)(p)
46 # define OFF32(p) (char*)(p)
47 #endif
48 #ifndef OFF16B
49 # define OFF16B(p) (char*)(p)
50 # define OFF32B(p) (char*)(p)
51 #endif
53 #define define_swapx(x, xtype) \
54 static xtype \
55 TOKEN_PASTE(swap,x)(xtype z) \
56 { \
57 xtype r; \
58 xtype *zp; \
59 unsigned char *s, *t; \
60 int i; \
62 zp = xmalloc(sizeof(xtype)); \
63 *zp = z; \
64 s = (unsigned char*)zp; \
65 t = xmalloc(sizeof(xtype)); \
66 for (i=0; i<sizeof(xtype); i++) { \
67 t[sizeof(xtype)-i-1] = s[i]; \
68 } \
69 r = *(xtype *)t; \
70 xfree(t); \
71 xfree(zp); \
72 return r; \
75 #ifndef swap16
76 #define swap16(x) ((((x)&0xFF)<<8) | (((x)>>8)&0xFF))
77 #endif
78 #if SIZEOF_SHORT == 2
79 #define swaps(x) swap16(x)
80 #else
81 #if SIZEOF_SHORT == 4
82 #define swaps(x) ((((x)&0xFF)<<24) \
83 |(((x)>>24)&0xFF) \
84 |(((x)&0x0000FF00)<<8) \
85 |(((x)&0x00FF0000)>>8) )
86 #else
87 define_swapx(s,short)
88 #endif
89 #endif
91 #ifndef swap32
92 #define swap32(x) ((((x)&0xFF)<<24) \
93 |(((x)>>24)&0xFF) \
94 |(((x)&0x0000FF00)<<8) \
95 |(((x)&0x00FF0000)>>8) )
96 #endif
97 #if SIZEOF_LONG == 4
98 #define swapl(x) swap32(x)
99 #else
100 #if SIZEOF_LONG == 8
101 #define swapl(x) ((((x)&0x00000000000000FF)<<56) \
102 |(((x)&0xFF00000000000000)>>56) \
103 |(((x)&0x000000000000FF00)<<40) \
104 |(((x)&0x00FF000000000000)>>40) \
105 |(((x)&0x0000000000FF0000)<<24) \
106 |(((x)&0x0000FF0000000000)>>24) \
107 |(((x)&0x00000000FF000000)<<8) \
108 |(((x)&0x000000FF00000000)>>8))
109 #else
110 define_swapx(l,long)
111 #endif
112 #endif
114 #if SIZEOF_FLOAT == 4
115 #if SIZEOF_LONG == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_LONG */
116 #define swapf(x) swapl(x)
117 #define FLOAT_SWAPPER unsigned long
118 #else
119 #if SIZEOF_SHORT == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_SHORT */
120 #define swapf(x) swaps(x)
121 #define FLOAT_SWAPPER unsigned short
122 #else /* SIZEOF_FLOAT == 4 but undivide by known size of int */
123 define_swapx(f,float)
124 #endif /* #if SIZEOF_SHORT == 4 */
125 #endif /* #if SIZEOF_LONG == 4 */
126 #else /* SIZEOF_FLOAT != 4 */
127 define_swapx(f,float)
128 #endif /* #if SIZEOF_FLOAT == 4 */
130 #if SIZEOF_DOUBLE == 8
131 #if SIZEOF_LONG == 8 /* SIZEOF_DOUBLE == 8 == SIZEOF_LONG */
132 #define swapd(x) swapl(x)
133 #define DOUBLE_SWAPPER unsigned long
134 #else
135 #if SIZEOF_LONG == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_LONG */
136 static double
137 swapd(const double d)
139 double dtmp = d;
140 unsigned long utmp[2];
141 unsigned long utmp0;
143 utmp[0] = 0; utmp[1] = 0;
144 memcpy(utmp,&dtmp,sizeof(double));
145 utmp0 = utmp[0];
146 utmp[0] = swapl(utmp[1]);
147 utmp[1] = swapl(utmp0);
148 memcpy(&dtmp,utmp,sizeof(double));
149 return dtmp;
151 #else
152 #if SIZEOF_SHORT == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_SHORT */
153 static double
154 swapd(const double d)
156 double dtmp = d;
157 unsigned short utmp[2];
158 unsigned short utmp0;
160 utmp[0] = 0; utmp[1] = 0;
161 memcpy(utmp,&dtmp,sizeof(double));
162 utmp0 = utmp[0];
163 utmp[0] = swaps(utmp[1]);
164 utmp[1] = swaps(utmp0);
165 memcpy(&dtmp,utmp,sizeof(double));
166 return dtmp;
168 #else /* SIZEOF_DOUBLE == 8 but undivied by known size of int */
169 define_swapx(d, double)
170 #endif /* #if SIZEOF_SHORT == 4 */
171 #endif /* #if SIZEOF_LONG == 4 */
172 #endif /* #if SIZEOF_LONG == 8 */
173 #else /* SIZEOF_DOUBLE != 8 */
174 define_swapx(d, double)
175 #endif /* #if SIZEOF_DOUBLE == 8 */
177 #undef define_swapx
179 #ifdef DYNAMIC_ENDIAN
180 #ifdef ntohs
181 #undef ntohs
182 #undef ntohl
183 #undef htons
184 #undef htonl
185 #endif
186 static int
187 endian(void)
189 static int init = 0;
190 static int endian_value;
191 char *p;
193 if (init) return endian_value;
194 init = 1;
195 p = (char*)&init;
196 return endian_value = p[0]?0:1;
199 #define ntohs(x) (endian()?(x):swaps(x))
200 #define ntohl(x) (endian()?(x):swapl(x))
201 #define ntohf(x) (endian()?(x):swapf(x))
202 #define ntohd(x) (endian()?(x):swapd(x))
203 #define htons(x) (endian()?(x):swaps(x))
204 #define htonl(x) (endian()?(x):swapl(x))
205 #define htonf(x) (endian()?(x):swapf(x))
206 #define htond(x) (endian()?(x):swapd(x))
207 #define htovs(x) (endian()?swaps(x):(x))
208 #define htovl(x) (endian()?swapl(x):(x))
209 #define htovf(x) (endian()?swapf(x):(x))
210 #define htovd(x) (endian()?swapd(x):(x))
211 #define vtohs(x) (endian()?swaps(x):(x))
212 #define vtohl(x) (endian()?swapl(x):(x))
213 #define vtohf(x) (endian()?swapf(x):(x))
214 #define vtohd(x) (endian()?swapd(x):(x))
215 # ifdef NATINT_PACK
216 #define htov16(x) (endian()?swap16(x):(x))
217 #define htov32(x) (endian()?swap32(x):(x))
218 #define hton16(x) (endian()?(x):swap16(x))
219 #define hton32(x) (endian()?(x):swap32(x))
220 # endif
221 #else
222 #ifdef WORDS_BIGENDIAN
223 #ifndef ntohs
224 #define ntohs(x) (x)
225 #define ntohl(x) (x)
226 #define htons(x) (x)
227 #define htonl(x) (x)
228 #endif
229 #define ntohf(x) (x)
230 #define ntohd(x) (x)
231 #define htonf(x) (x)
232 #define htond(x) (x)
233 #define htovs(x) swaps(x)
234 #define htovl(x) swapl(x)
235 #define htovf(x) swapf(x)
236 #define htovd(x) swapd(x)
237 #define vtohs(x) swaps(x)
238 #define vtohl(x) swapl(x)
239 #define vtohf(x) swapf(x)
240 #define vtohd(x) swapd(x)
241 # ifdef NATINT_PACK
242 #define htov16(x) swap16(x)
243 #define htov32(x) swap32(x)
244 #define hton16(x) (x)
245 #define hton32(x) (x)
246 # endif
247 #else /* LITTLE ENDIAN */
248 #ifdef ntohs
249 #undef ntohs
250 #undef ntohl
251 #undef htons
252 #undef htonl
253 #endif
254 #define ntohs(x) swaps(x)
255 #define ntohl(x) swapl(x)
256 #define htons(x) swaps(x)
257 #define htonl(x) swapl(x)
258 #define ntohf(x) swapf(x)
259 #define ntohd(x) swapd(x)
260 #define htonf(x) swapf(x)
261 #define htond(x) swapd(x)
262 #define htovs(x) (x)
263 #define htovl(x) (x)
264 #define htovf(x) (x)
265 #define htovd(x) (x)
266 #define vtohs(x) (x)
267 #define vtohl(x) (x)
268 #define vtohf(x) (x)
269 #define vtohd(x) (x)
270 # ifdef NATINT_PACK
271 #define htov16(x) (x)
272 #define htov32(x) (x)
273 #define hton16(x) swap16(x)
274 #define hton32(x) swap32(x)
275 # endif
276 #endif
277 #endif
279 #ifdef FLOAT_SWAPPER
280 #define FLOAT_CONVWITH(y) FLOAT_SWAPPER y;
281 #define HTONF(x,y) (memcpy(&y,&x,sizeof(float)), \
282 y = htonf((FLOAT_SWAPPER)y), \
283 memcpy(&x,&y,sizeof(float)), \
285 #define HTOVF(x,y) (memcpy(&y,&x,sizeof(float)), \
286 y = htovf((FLOAT_SWAPPER)y), \
287 memcpy(&x,&y,sizeof(float)), \
289 #define NTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \
290 y = ntohf((FLOAT_SWAPPER)y), \
291 memcpy(&x,&y,sizeof(float)), \
293 #define VTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \
294 y = vtohf((FLOAT_SWAPPER)y), \
295 memcpy(&x,&y,sizeof(float)), \
297 #else
298 #define FLOAT_CONVWITH(y)
299 #define HTONF(x,y) htonf(x)
300 #define HTOVF(x,y) htovf(x)
301 #define NTOHF(x,y) ntohf(x)
302 #define VTOHF(x,y) vtohf(x)
303 #endif
305 #ifdef DOUBLE_SWAPPER
306 #define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y;
307 #define HTOND(x,y) (memcpy(&y,&x,sizeof(double)), \
308 y = htond((DOUBLE_SWAPPER)y), \
309 memcpy(&x,&y,sizeof(double)), \
311 #define HTOVD(x,y) (memcpy(&y,&x,sizeof(double)), \
312 y = htovd((DOUBLE_SWAPPER)y), \
313 memcpy(&x,&y,sizeof(double)), \
315 #define NTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \
316 y = ntohd((DOUBLE_SWAPPER)y), \
317 memcpy(&x,&y,sizeof(double)), \
319 #define VTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \
320 y = vtohd((DOUBLE_SWAPPER)y), \
321 memcpy(&x,&y,sizeof(double)), \
323 #else
324 #define DOUBLE_CONVWITH(y)
325 #define HTOND(x,y) htond(x)
326 #define HTOVD(x,y) htovd(x)
327 #define NTOHD(x,y) ntohd(x)
328 #define VTOHD(x,y) vtohd(x)
329 #endif
331 unsigned long rb_big2ulong_pack(VALUE x);
333 static unsigned long
334 num2i32(VALUE x)
336 x = rb_to_int(x); /* is nil OK? (should not) */
338 if (FIXNUM_P(x)) return FIX2LONG(x);
339 if (TYPE(x) == T_BIGNUM) {
340 return rb_big2ulong_pack(x);
342 rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x));
343 return 0; /* not reached */
346 #if SIZEOF_LONG == SIZE32
347 # define EXTEND32(x)
348 #else
349 /* invariant in modulo 1<<31 */
350 # define EXTEND32(x) do { if (!natint) {(x) = (((1L<<31)-1-(x))^~(~0L<<31));}} while(0)
351 #endif
352 #if SIZEOF_SHORT == SIZE16
353 # define EXTEND16(x)
354 #else
355 # define EXTEND16(x) do { if (!natint) {(x) = (short)(((1<<15)-1-(x))^~(~0<<15));}} while(0)
356 #endif
358 #ifdef HAVE_LONG_LONG
359 # define QUAD_SIZE sizeof(LONG_LONG)
360 #else
361 # define QUAD_SIZE 8
362 #endif
363 static const char toofew[] = "too few arguments";
365 static void encodes(VALUE,const char*,long,int);
366 static void qpencode(VALUE,VALUE,long);
368 static unsigned long utf8_to_uv(const char*,long*);
371 * call-seq:
372 * arr.pack ( aTemplateString ) -> aBinaryString
374 * Packs the contents of <i>arr</i> into a binary sequence according to
375 * the directives in <i>aTemplateString</i> (see the table below)
376 * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
377 * which gives the width of the resulting field. The remaining
378 * directives also may take a count, indicating the number of array
379 * elements to convert. If the count is an asterisk
380 * (``<code>*</code>''), all remaining array elements will be
381 * converted. Any of the directives ``<code>sSiIlL</code>'' may be
382 * followed by an underscore (``<code>_</code>'') to use the underlying
383 * platform's native size for the specified type; otherwise, they use a
384 * platform-independent size. Spaces are ignored in the template
385 * string. See also <code>String#unpack</code>.
387 * a = [ "a", "b", "c" ]
388 * n = [ 65, 66, 67 ]
389 * a.pack("A3A3A3") #=> "a b c "
390 * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000"
391 * n.pack("ccc") #=> "ABC"
393 * Directives for +pack+.
395 * Directive Meaning
396 * ---------------------------------------------------------------
397 * @ | Moves to absolute position
398 * A | arbitrary binary string (space padded, count is width)
399 * a | arbitrary binary string (null padded, count is width)
400 * B | Bit string (descending bit order)
401 * b | Bit string (ascending bit order)
402 * C | Unsigned byte (C unsigned char)
403 * c | Byte (C char)
404 * D, d | Double-precision float, native format
405 * E | Double-precision float, little-endian byte order
406 * e | Single-precision float, little-endian byte order
407 * F, f | Single-precision float, native format
408 * G | Double-precision float, network (big-endian) byte order
409 * g | Single-precision float, network (big-endian) byte order
410 * H | Hex string (high nibble first)
411 * h | Hex string (low nibble first)
412 * I | Unsigned integer
413 * i | Integer
414 * L | Unsigned long
415 * l | Long
416 * M | Quoted printable, MIME encoding (see RFC2045)
417 * m | Base64 encoded string
418 * N | Long, network (big-endian) byte order
419 * n | Short, network (big-endian) byte-order
420 * P | Pointer to a structure (fixed-length string)
421 * p | Pointer to a null-terminated string
422 * Q, q | 64-bit number
423 * S | Unsigned short
424 * s | Short
425 * U | UTF-8
426 * u | UU-encoded string
427 * V | Long, little-endian byte order
428 * v | Short, little-endian byte order
429 * w | BER-compressed integer\fnm
430 * X | Back up a byte
431 * x | Null byte
432 * Z | Same as ``a'', except that null is added with *
435 static VALUE
436 pack_pack(VALUE ary, VALUE fmt)
438 static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
439 static const char spc10[] = " ";
440 const char *p, *pend;
441 VALUE res, from, associates = 0;
442 char type;
443 long items, len, idx, plen;
444 const char *ptr;
445 #ifdef NATINT_PACK
446 int natint; /* native integer */
447 #endif
449 StringValue(fmt);
450 p = RSTRING_PTR(fmt);
451 pend = p + RSTRING_LEN(fmt);
452 res = rb_str_buf_new(0);
454 items = RARRAY_LEN(ary);
455 idx = 0;
457 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
458 #define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW)
459 #define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW)
461 while (p < pend) {
462 if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
463 rb_raise(rb_eRuntimeError, "format string modified");
465 type = *p++; /* get data type */
466 #ifdef NATINT_PACK
467 natint = 0;
468 #endif
470 if (ISSPACE(type)) continue;
471 if (type == '#') {
472 while ((p < pend) && (*p != '\n')) {
473 p++;
475 continue;
477 if (*p == '_' || *p == '!') {
478 static const char natstr[] = "sSiIlL";
480 if (strchr(natstr, type)) {
481 #ifdef NATINT_PACK
482 natint = 1;
483 #endif
484 p++;
486 else {
487 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
490 if (*p == '*') { /* set data length */
491 len = strchr("@Xxu", type) ? 0 : items;
492 p++;
494 else if (ISDIGIT(*p)) {
495 errno = 0;
496 len = STRTOUL(p, (char**)&p, 10);
497 if (errno) {
498 rb_raise(rb_eRangeError, "pack length too big");
501 else {
502 len = 1;
505 switch (type) {
506 case 'A': case 'a': case 'Z':
507 case 'B': case 'b':
508 case 'H': case 'h':
509 from = NEXTFROM;
510 if (NIL_P(from)) {
511 ptr = "";
512 plen = 0;
514 else {
515 StringValue(from);
516 ptr = RSTRING_PTR(from);
517 plen = RSTRING_LEN(from);
518 OBJ_INFECT(res, from);
521 if (p[-1] == '*')
522 len = plen;
524 switch (type) {
525 case 'a': /* arbitrary binary string (null padded) */
526 case 'A': /* arbitrary binary string (ASCII space padded) */
527 case 'Z': /* null terminated string */
528 if (plen >= len) {
529 rb_str_buf_cat(res, ptr, len);
530 if (p[-1] == '*' && type == 'Z')
531 rb_str_buf_cat(res, nul10, 1);
533 else {
534 rb_str_buf_cat(res, ptr, plen);
535 len -= plen;
536 while (len >= 10) {
537 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
538 len -= 10;
540 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
542 break;
544 case 'b': /* bit string (ascending) */
546 int byte = 0;
547 long i, j = 0;
549 if (len > plen) {
550 j = (len - plen + 1)/2;
551 len = plen;
553 for (i=0; i++ < len; ptr++) {
554 if (*ptr & 1)
555 byte |= 128;
556 if (i & 7)
557 byte >>= 1;
558 else {
559 char c = byte & 0xff;
560 rb_str_buf_cat(res, &c, 1);
561 byte = 0;
564 if (len & 7) {
565 char c;
566 byte >>= 7 - (len & 7);
567 c = byte & 0xff;
568 rb_str_buf_cat(res, &c, 1);
570 len = j;
571 goto grow;
573 break;
575 case 'B': /* bit string (descending) */
577 int byte = 0;
578 long i, j = 0;
580 if (len > plen) {
581 j = (len - plen + 1)/2;
582 len = plen;
584 for (i=0; i++ < len; ptr++) {
585 byte |= *ptr & 1;
586 if (i & 7)
587 byte <<= 1;
588 else {
589 char c = byte & 0xff;
590 rb_str_buf_cat(res, &c, 1);
591 byte = 0;
594 if (len & 7) {
595 char c;
596 byte <<= 7 - (len & 7);
597 c = byte & 0xff;
598 rb_str_buf_cat(res, &c, 1);
600 len = j;
601 goto grow;
603 break;
605 case 'h': /* hex string (low nibble first) */
607 int byte = 0;
608 long i, j = 0;
610 if (len > plen) {
611 j = (len - plen + 1)/2;
612 len = plen;
614 for (i=0; i++ < len; ptr++) {
615 if (ISALPHA(*ptr))
616 byte |= (((*ptr & 15) + 9) & 15) << 4;
617 else
618 byte |= (*ptr & 15) << 4;
619 if (i & 1)
620 byte >>= 4;
621 else {
622 char c = byte & 0xff;
623 rb_str_buf_cat(res, &c, 1);
624 byte = 0;
627 if (len & 1) {
628 char c = byte & 0xff;
629 rb_str_buf_cat(res, &c, 1);
631 len = j;
632 goto grow;
634 break;
636 case 'H': /* hex string (high nibble first) */
638 int byte = 0;
639 long i, j = 0;
641 if (len > plen) {
642 j = (len - plen + 1)/2;
643 len = plen;
645 for (i=0; i++ < len; ptr++) {
646 if (ISALPHA(*ptr))
647 byte |= ((*ptr & 15) + 9) & 15;
648 else
649 byte |= *ptr & 15;
650 if (i & 1)
651 byte <<= 4;
652 else {
653 char c = byte & 0xff;
654 rb_str_buf_cat(res, &c, 1);
655 byte = 0;
658 if (len & 1) {
659 char c = byte & 0xff;
660 rb_str_buf_cat(res, &c, 1);
662 len = j;
663 goto grow;
665 break;
667 break;
669 case 'c': /* signed char */
670 case 'C': /* unsigned char */
671 while (len-- > 0) {
672 char c;
674 from = NEXTFROM;
675 c = num2i32(from);
676 rb_str_buf_cat(res, &c, sizeof(char));
678 break;
680 case 's': /* signed short */
681 case 'S': /* unsigned short */
682 while (len-- > 0) {
683 short s;
685 from = NEXTFROM;
686 s = num2i32(from);
687 rb_str_buf_cat(res, OFF16(&s), NATINT_LEN(short,2));
689 break;
691 case 'i': /* signed int */
692 case 'I': /* unsigned int */
693 while (len-- > 0) {
694 int i;
696 from = NEXTFROM;
697 i = num2i32(from);
698 rb_str_buf_cat(res, (char*)&i, sizeof(int));
700 break;
702 case 'l': /* signed long */
703 case 'L': /* unsigned long */
704 while (len-- > 0) {
705 long l;
707 from = NEXTFROM;
708 l = num2i32(from);
709 rb_str_buf_cat(res, OFF32(&l), NATINT_LEN(long,4));
711 break;
713 case 'q': /* signed quad (64bit) int */
714 case 'Q': /* unsigned quad (64bit) int */
715 while (len-- > 0) {
716 char tmp[QUAD_SIZE];
718 from = NEXTFROM;
719 rb_quad_pack(tmp, from);
720 rb_str_buf_cat(res, (char*)&tmp, QUAD_SIZE);
722 break;
724 case 'n': /* unsigned short (network byte-order) */
725 while (len-- > 0) {
726 unsigned short s;
728 from = NEXTFROM;
729 s = num2i32(from);
730 s = NATINT_HTONS(s);
731 rb_str_buf_cat(res, OFF16(&s), NATINT_LEN(short,2));
733 break;
735 case 'N': /* unsigned long (network byte-order) */
736 while (len-- > 0) {
737 unsigned long l;
739 from = NEXTFROM;
740 l = num2i32(from);
741 l = NATINT_HTONL(l);
742 rb_str_buf_cat(res, OFF32(&l), NATINT_LEN(long,4));
744 break;
746 case 'v': /* unsigned short (VAX byte-order) */
747 while (len-- > 0) {
748 unsigned short s;
750 from = NEXTFROM;
751 s = num2i32(from);
752 s = NATINT_HTOVS(s);
753 rb_str_buf_cat(res, OFF16(&s), NATINT_LEN(short,2));
755 break;
757 case 'V': /* unsigned long (VAX byte-order) */
758 while (len-- > 0) {
759 unsigned long l;
761 from = NEXTFROM;
762 l = num2i32(from);
763 l = NATINT_HTOVL(l);
764 rb_str_buf_cat(res, OFF32(&l), NATINT_LEN(long,4));
766 break;
768 case 'f': /* single precision float in native format */
769 case 'F': /* ditto */
770 while (len-- > 0) {
771 float f;
773 from = NEXTFROM;
774 f = RFLOAT_VALUE(rb_Float(from));
775 rb_str_buf_cat(res, (char*)&f, sizeof(float));
777 break;
779 case 'e': /* single precision float in VAX byte-order */
780 while (len-- > 0) {
781 float f;
782 FLOAT_CONVWITH(ftmp);
784 from = NEXTFROM;
785 f = RFLOAT_VALUE(rb_Float(from));
786 f = HTOVF(f,ftmp);
787 rb_str_buf_cat(res, (char*)&f, sizeof(float));
789 break;
791 case 'E': /* double precision float in VAX byte-order */
792 while (len-- > 0) {
793 double d;
794 DOUBLE_CONVWITH(dtmp);
796 from = NEXTFROM;
797 d = RFLOAT_VALUE(rb_Float(from));
798 d = HTOVD(d,dtmp);
799 rb_str_buf_cat(res, (char*)&d, sizeof(double));
801 break;
803 case 'd': /* double precision float in native format */
804 case 'D': /* ditto */
805 while (len-- > 0) {
806 double d;
808 from = NEXTFROM;
809 d = RFLOAT_VALUE(rb_Float(from));
810 rb_str_buf_cat(res, (char*)&d, sizeof(double));
812 break;
814 case 'g': /* single precision float in network byte-order */
815 while (len-- > 0) {
816 float f;
817 FLOAT_CONVWITH(ftmp);
819 from = NEXTFROM;
820 f = RFLOAT_VALUE(rb_Float(from));
821 f = HTONF(f,ftmp);
822 rb_str_buf_cat(res, (char*)&f, sizeof(float));
824 break;
826 case 'G': /* double precision float in network byte-order */
827 while (len-- > 0) {
828 double d;
829 DOUBLE_CONVWITH(dtmp);
831 from = NEXTFROM;
832 d = RFLOAT_VALUE(rb_Float(from));
833 d = HTOND(d,dtmp);
834 rb_str_buf_cat(res, (char*)&d, sizeof(double));
836 break;
838 case 'x': /* null byte */
839 grow:
840 while (len >= 10) {
841 rb_str_buf_cat(res, nul10, 10);
842 len -= 10;
844 rb_str_buf_cat(res, nul10, len);
845 break;
847 case 'X': /* back up byte */
848 shrink:
849 plen = RSTRING_LEN(res);
850 if (plen < len)
851 rb_raise(rb_eArgError, "X outside of string");
852 rb_str_set_len(res, plen - len);
853 break;
855 case '@': /* null fill to absolute position */
856 len -= RSTRING_LEN(res);
857 if (len > 0) goto grow;
858 len = -len;
859 if (len > 0) goto shrink;
860 break;
862 case '%':
863 rb_raise(rb_eArgError, "%% is not supported");
864 break;
866 case 'U': /* Unicode character */
867 while (len-- > 0) {
868 SIGNED_VALUE l;
869 char buf[8];
870 int le;
872 from = NEXTFROM;
873 from = rb_to_int(from);
874 l = NUM2LONG(from);
875 if (l < 0) {
876 rb_raise(rb_eRangeError, "pack(U): value out of range");
878 le = rb_uv_to_utf8(buf, l);
879 rb_str_buf_cat(res, (char*)buf, le);
881 break;
883 case 'u': /* uuencoded string */
884 case 'm': /* base64 encoded string */
885 from = NEXTFROM;
886 StringValue(from);
887 ptr = RSTRING_PTR(from);
888 plen = RSTRING_LEN(from);
890 if (len <= 2)
891 len = 45;
892 else
893 len = len / 3 * 3;
894 while (plen > 0) {
895 long todo;
897 if (plen > len)
898 todo = len;
899 else
900 todo = plen;
901 encodes(res, ptr, todo, type);
902 plen -= todo;
903 ptr += todo;
905 break;
907 case 'M': /* quoted-printable encoded string */
908 from = rb_obj_as_string(NEXTFROM);
909 if (len <= 1)
910 len = 72;
911 qpencode(res, from, len);
912 break;
914 case 'P': /* pointer to packed byte string */
915 from = THISFROM;
916 if (!NIL_P(from)) {
917 StringValue(from);
918 if (RSTRING_LEN(from) < len) {
919 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
920 RSTRING_LEN(from), len);
923 len = 1;
924 /* FALL THROUGH */
925 case 'p': /* pointer to string */
926 while (len-- > 0) {
927 char *t;
928 from = NEXTFROM;
929 if (NIL_P(from)) {
930 t = 0;
932 else {
933 t = StringValuePtr(from);
935 if (!associates) {
936 associates = rb_ary_new();
938 rb_ary_push(associates, from);
939 rb_obj_taint(from);
940 rb_str_buf_cat(res, (char*)&t, sizeof(char*));
942 break;
944 case 'w': /* BER compressed integer */
945 while (len-- > 0) {
946 unsigned long ul;
947 VALUE buf = rb_str_new(0, 0);
948 char c, *bufs, *bufe;
950 from = NEXTFROM;
951 if (TYPE(from) == T_BIGNUM) {
952 VALUE big128 = rb_uint2big(128);
953 while (TYPE(from) == T_BIGNUM) {
954 from = rb_big_divmod(from, big128);
955 c = NUM2INT(RARRAY_PTR(from)[1]) | 0x80; /* mod */
956 rb_str_buf_cat(buf, &c, sizeof(char));
957 from = RARRAY_PTR(from)[0]; /* div */
962 long l = NUM2LONG(from);
963 if (l < 0) {
964 rb_raise(rb_eArgError, "can't compress negative numbers");
966 ul = l;
969 while (ul) {
970 c = ((ul & 0x7f) | 0x80);
971 rb_str_buf_cat(buf, &c, sizeof(char));
972 ul >>= 7;
975 if (RSTRING_LEN(buf)) {
976 bufs = RSTRING_PTR(buf);
977 bufe = bufs + RSTRING_LEN(buf) - 1;
978 *bufs &= 0x7f; /* clear continue bit */
979 while (bufs < bufe) { /* reverse */
980 c = *bufs;
981 *bufs++ = *bufe;
982 *bufe-- = c;
984 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
986 else {
987 c = 0;
988 rb_str_buf_cat(res, &c, sizeof(char));
991 break;
993 default:
994 break;
998 if (associates) {
999 rb_str_associate(res, associates);
1001 return res;
1004 static const char uu_table[] =
1005 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1006 static const char b64_table[] =
1007 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1009 static void
1010 encodes(VALUE str, const char *s, long len, int type)
1012 char *buff = ALLOCA_N(char, len * 4 / 3 + 6);
1013 long i = 0;
1014 const char *trans = type == 'u' ? uu_table : b64_table;
1015 int padding;
1017 if (type == 'u') {
1018 buff[i++] = len + ' ';
1019 padding = '`';
1021 else {
1022 padding = '=';
1024 while (len >= 3) {
1025 buff[i++] = trans[077 & (*s >> 2)];
1026 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
1027 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
1028 buff[i++] = trans[077 & s[2]];
1029 s += 3;
1030 len -= 3;
1032 if (len == 2) {
1033 buff[i++] = trans[077 & (*s >> 2)];
1034 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
1035 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
1036 buff[i++] = padding;
1038 else if (len == 1) {
1039 buff[i++] = trans[077 & (*s >> 2)];
1040 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
1041 buff[i++] = padding;
1042 buff[i++] = padding;
1044 buff[i++] = '\n';
1045 rb_str_buf_cat(str, buff, i);
1048 static const char hex_table[] = "0123456789ABCDEF";
1050 static void
1051 qpencode(VALUE str, VALUE from, long len)
1053 char buff[1024];
1054 long i = 0, n = 0, prev = EOF;
1055 unsigned char *s = (unsigned char*)RSTRING_PTR(from);
1056 unsigned char *send = s + RSTRING_LEN(from);
1058 while (s < send) {
1059 if ((*s > 126) ||
1060 (*s < 32 && *s != '\n' && *s != '\t') ||
1061 (*s == '=')) {
1062 buff[i++] = '=';
1063 buff[i++] = hex_table[*s >> 4];
1064 buff[i++] = hex_table[*s & 0x0f];
1065 n += 3;
1066 prev = EOF;
1068 else if (*s == '\n') {
1069 if (prev == ' ' || prev == '\t') {
1070 buff[i++] = '=';
1071 buff[i++] = *s;
1073 buff[i++] = *s;
1074 n = 0;
1075 prev = *s;
1077 else {
1078 buff[i++] = *s;
1079 n++;
1080 prev = *s;
1082 if (n > len) {
1083 buff[i++] = '=';
1084 buff[i++] = '\n';
1085 n = 0;
1086 prev = '\n';
1088 if (i > 1024 - 5) {
1089 rb_str_buf_cat(str, buff, i);
1090 i = 0;
1092 s++;
1094 if (n > 0) {
1095 buff[i++] = '=';
1096 buff[i++] = '\n';
1098 if (i > 0) {
1099 rb_str_buf_cat(str, buff, i);
1103 static inline int
1104 hex2num(char c)
1106 switch (c) {
1107 case '0': case '1': case '2': case '3': case '4':
1108 case '5': case '6': case '7': case '8': case '9':
1109 return c - '0';
1110 case 'a': case 'b': case 'c':
1111 case 'd': case 'e': case 'f':
1112 return c - 'a' + 10;
1113 case 'A': case 'B': case 'C':
1114 case 'D': case 'E': case 'F':
1115 return c - 'A' + 10;
1116 default:
1117 return -1;
1121 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
1122 tmp = 0; \
1123 if (len > (send-s)/sz) { \
1124 if (!star) { \
1125 tmp = len-(send-s)/sz; \
1127 len = (send-s)/sz; \
1129 } while (0)
1131 #ifdef NATINT_PACK
1132 #define PACK_LENGTH_ADJUST(type,sz) do { \
1133 int t__len = NATINT_LEN(type,(sz)); \
1134 PACK_LENGTH_ADJUST_SIZE(t__len); \
1135 } while (0)
1136 #else
1137 #define PACK_LENGTH_ADJUST(type,sz) \
1138 PACK_LENGTH_ADJUST_SIZE(sizeof(type))
1139 #endif
1141 #define PACK_ITEM_ADJUST() while (tmp--) rb_ary_push(ary, Qnil)
1143 static VALUE
1144 infected_str_new(const char *ptr, long len, VALUE str)
1146 VALUE s = rb_str_new(ptr, len);
1148 OBJ_INFECT(s, str);
1149 return s;
1153 * call-seq:
1154 * str.unpack(format) => anArray
1156 * Decodes <i>str</i> (which may contain binary data) according to the
1157 * format string, returning an array of each value extracted. The
1158 * format string consists of a sequence of single-character directives,
1159 * summarized in the table at the end of this entry.
1160 * Each directive may be followed
1161 * by a number, indicating the number of times to repeat with this
1162 * directive. An asterisk (``<code>*</code>'') will use up all
1163 * remaining elements. The directives <code>sSiIlL</code> may each be
1164 * followed by an underscore (``<code>_</code>'') to use the underlying
1165 * platform's native size for the specified type; otherwise, it uses a
1166 * platform-independent consistent size. Spaces are ignored in the
1167 * format string. See also <code>Array#pack</code>.
1169 * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
1170 * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
1171 * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
1172 * "aa".unpack('b8B8') #=> ["10000110", "01100001"]
1173 * "aaa".unpack('h2H2c') #=> ["16", "61", 97]
1174 * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
1175 * "now=20is".unpack('M*') #=> ["now is"]
1176 * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
1178 * This table summarizes the various formats and the Ruby classes
1179 * returned by each.
1181 * Format | Returns | Function
1182 * -------+---------+-----------------------------------------
1183 * A | String | arbitrary binary string with trailing
1184 * | | nulls and ASCII spaces removed
1185 * -------+---------+-----------------------------------------
1186 * a | String | arbitrary binary string
1187 * -------+---------+-----------------------------------------
1188 * B | String | extract bits from each character (msb first)
1189 * -------+---------+-----------------------------------------
1190 * b | String | extract bits from each character (lsb first)
1191 * -------+---------+-----------------------------------------
1192 * C | Fixnum | extract a byte (C char) as an unsigned integer
1193 * -------+---------+-----------------------------------------
1194 * c | Fixnum | extract a byte (C char) as an integer
1195 * -------+---------+-----------------------------------------
1196 * d,D | Float | treat sizeof(double) characters as
1197 * | | a native double
1198 * -------+---------+-----------------------------------------
1199 * E | Float | treat sizeof(double) characters as
1200 * | | a double in little-endian byte order
1201 * -------+---------+-----------------------------------------
1202 * e | Float | treat sizeof(float) characters as
1203 * | | a float in little-endian byte order
1204 * -------+---------+-----------------------------------------
1205 * f,F | Float | treat sizeof(float) characters as
1206 * | | a native float
1207 * -------+---------+-----------------------------------------
1208 * G | Float | treat sizeof(double) characters as
1209 * | | a double in network byte order
1210 * -------+---------+-----------------------------------------
1211 * g | Float | treat sizeof(float) characters as a
1212 * | | float in network byte order
1213 * -------+---------+-----------------------------------------
1214 * H | String | extract hex nibbles from each character
1215 * | | (most significant first)
1216 * -------+---------+-----------------------------------------
1217 * h | String | extract hex nibbles from each character
1218 * | | (least significant first)
1219 * -------+---------+-----------------------------------------
1220 * I | Integer | treat sizeof(int) (modified by _)
1221 * | | successive characters as an unsigned
1222 * | | native integer
1223 * -------+---------+-----------------------------------------
1224 * i | Integer | treat sizeof(int) (modified by _)
1225 * | | successive characters as a signed
1226 * | | native integer
1227 * -------+---------+-----------------------------------------
1228 * L | Integer | treat four (modified by _) successive
1229 * | | characters as an unsigned native
1230 * | | long integer
1231 * -------+---------+-----------------------------------------
1232 * l | Integer | treat four (modified by _) successive
1233 * | | characters as a signed native
1234 * | | long integer
1235 * -------+---------+-----------------------------------------
1236 * M | String | quoted-printable
1237 * -------+---------+-----------------------------------------
1238 * m | String | base64-encoded
1239 * -------+---------+-----------------------------------------
1240 * N | Integer | treat four characters as an unsigned
1241 * | | long in network byte order
1242 * -------+---------+-----------------------------------------
1243 * n | Fixnum | treat two characters as an unsigned
1244 * | | short in network byte order
1245 * -------+---------+-----------------------------------------
1246 * P | String | treat sizeof(char *) characters as a
1247 * | | pointer, and return \emph{len} characters
1248 * | | from the referenced location
1249 * -------+---------+-----------------------------------------
1250 * p | String | treat sizeof(char *) characters as a
1251 * | | pointer to a null-terminated string
1252 * -------+---------+-----------------------------------------
1253 * Q | Integer | treat 8 characters as an unsigned
1254 * | | quad word (64 bits)
1255 * -------+---------+-----------------------------------------
1256 * q | Integer | treat 8 characters as a signed
1257 * | | quad word (64 bits)
1258 * -------+---------+-----------------------------------------
1259 * S | Fixnum | treat two (different if _ used)
1260 * | | successive characters as an unsigned
1261 * | | short in native byte order
1262 * -------+---------+-----------------------------------------
1263 * s | Fixnum | Treat two (different if _ used)
1264 * | | successive characters as a signed short
1265 * | | in native byte order
1266 * -------+---------+-----------------------------------------
1267 * U | Integer | UTF-8 characters as unsigned integers
1268 * -------+---------+-----------------------------------------
1269 * u | String | UU-encoded
1270 * -------+---------+-----------------------------------------
1271 * V | Fixnum | treat four characters as an unsigned
1272 * | | long in little-endian byte order
1273 * -------+---------+-----------------------------------------
1274 * v | Fixnum | treat two characters as an unsigned
1275 * | | short in little-endian byte order
1276 * -------+---------+-----------------------------------------
1277 * w | Integer | BER-compressed integer (see Array.pack)
1278 * -------+---------+-----------------------------------------
1279 * X | --- | skip backward one character
1280 * -------+---------+-----------------------------------------
1281 * x | --- | skip forward one character
1282 * -------+---------+-----------------------------------------
1283 * Z | String | with trailing nulls removed
1284 * | | upto first null with *
1285 * -------+---------+-----------------------------------------
1286 * @ | --- | skip to the offset given by the
1287 * | | length argument
1288 * -------+---------+-----------------------------------------
1291 static VALUE
1292 pack_unpack(VALUE str, VALUE fmt)
1294 static const char hexdigits[] = "0123456789abcdef";
1295 char *s, *send;
1296 char *p, *pend;
1297 VALUE ary;
1298 char type;
1299 long len;
1300 int tmp, star;
1301 #ifdef NATINT_PACK
1302 int natint; /* native integer */
1303 #endif
1304 int block_p = rb_block_given_p();
1305 #define UNPACK_PUSH(item) do {\
1306 VALUE item_val = (item);\
1307 if (block_p) {\
1308 rb_yield(item_val);\
1310 else {\
1311 rb_ary_push(ary, item_val);\
1313 } while (0)
1315 StringValue(str);
1316 StringValue(fmt);
1317 s = RSTRING_PTR(str);
1318 send = s + RSTRING_LEN(str);
1319 p = RSTRING_PTR(fmt);
1320 pend = p + RSTRING_LEN(fmt);
1322 ary = block_p ? Qnil : rb_ary_new();
1323 while (p < pend) {
1324 type = *p++;
1325 #ifdef NATINT_PACK
1326 natint = 0;
1327 #endif
1329 if (ISSPACE(type)) continue;
1330 if (type == '#') {
1331 while ((p < pend) && (*p != '\n')) {
1332 p++;
1334 continue;
1336 star = 0;
1337 if (*p == '_' || *p == '!') {
1338 static const char natstr[] = "sSiIlL";
1340 if (strchr(natstr, type)) {
1341 #ifdef NATINT_PACK
1342 natint = 1;
1343 #endif
1344 p++;
1346 else {
1347 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
1350 if (p >= pend)
1351 len = 1;
1352 else if (*p == '*') {
1353 star = 1;
1354 len = send - s;
1355 p++;
1357 else if (ISDIGIT(*p)) {
1358 errno = 0;
1359 len = STRTOUL(p, (char**)&p, 10);
1360 if (errno) {
1361 rb_raise(rb_eRangeError, "pack length too big");
1364 else {
1365 len = (type != '@');
1368 switch (type) {
1369 case '%':
1370 rb_raise(rb_eArgError, "%% is not supported");
1371 break;
1373 case 'A':
1374 if (len > send - s) len = send - s;
1376 long end = len;
1377 char *t = s + len - 1;
1379 while (t >= s) {
1380 if (*t != ' ' && *t != '\0') break;
1381 t--; len--;
1383 UNPACK_PUSH(infected_str_new(s, len, str));
1384 s += end;
1386 break;
1388 case 'Z':
1390 char *t = s;
1392 if (len > send-s) len = send-s;
1393 while (t < s+len && *t) t++;
1394 UNPACK_PUSH(infected_str_new(s, t-s, str));
1395 if (t < send) t++;
1396 s = star ? t : s+len;
1398 break;
1400 case 'a':
1401 if (len > send - s) len = send - s;
1402 UNPACK_PUSH(infected_str_new(s, len, str));
1403 s += len;
1404 break;
1406 case 'b':
1408 VALUE bitstr;
1409 char *t;
1410 int bits;
1411 long i;
1413 if (p[-1] == '*' || len > (send - s) * 8)
1414 len = (send - s) * 8;
1415 bits = 0;
1416 UNPACK_PUSH(bitstr = rb_str_new(0, len));
1417 t = RSTRING_PTR(bitstr);
1418 for (i=0; i<len; i++) {
1419 if (i & 7) bits >>= 1;
1420 else bits = *s++;
1421 *t++ = (bits & 1) ? '1' : '0';
1424 break;
1426 case 'B':
1428 VALUE bitstr;
1429 char *t;
1430 int bits;
1431 long i;
1433 if (p[-1] == '*' || len > (send - s) * 8)
1434 len = (send - s) * 8;
1435 bits = 0;
1436 UNPACK_PUSH(bitstr = rb_str_new(0, len));
1437 t = RSTRING_PTR(bitstr);
1438 for (i=0; i<len; i++) {
1439 if (i & 7) bits <<= 1;
1440 else bits = *s++;
1441 *t++ = (bits & 128) ? '1' : '0';
1444 break;
1446 case 'h':
1448 VALUE bitstr;
1449 char *t;
1450 int bits;
1451 long i;
1453 if (p[-1] == '*' || len > (send - s) * 2)
1454 len = (send - s) * 2;
1455 bits = 0;
1456 UNPACK_PUSH(bitstr = rb_str_new(0, len));
1457 t = RSTRING_PTR(bitstr);
1458 for (i=0; i<len; i++) {
1459 if (i & 1)
1460 bits >>= 4;
1461 else
1462 bits = *s++;
1463 *t++ = hexdigits[bits & 15];
1466 break;
1468 case 'H':
1470 VALUE bitstr;
1471 char *t;
1472 int bits;
1473 long i;
1475 if (p[-1] == '*' || len > (send - s) * 2)
1476 len = (send - s) * 2;
1477 bits = 0;
1478 UNPACK_PUSH(bitstr = rb_str_new(0, len));
1479 t = RSTRING_PTR(bitstr);
1480 for (i=0; i<len; i++) {
1481 if (i & 1)
1482 bits <<= 4;
1483 else
1484 bits = *s++;
1485 *t++ = hexdigits[(bits >> 4) & 15];
1488 break;
1490 case 'c':
1491 PACK_LENGTH_ADJUST(char,sizeof(char));
1492 while (len-- > 0) {
1493 int c = *s++;
1494 if (c > (char)127) c-=256;
1495 UNPACK_PUSH(INT2FIX(c));
1497 PACK_ITEM_ADJUST();
1498 break;
1500 case 'C':
1501 PACK_LENGTH_ADJUST(unsigned char,sizeof(unsigned char));
1502 while (len-- > 0) {
1503 unsigned char c = *s++;
1504 UNPACK_PUSH(INT2FIX(c));
1506 PACK_ITEM_ADJUST();
1507 break;
1509 case 's':
1510 PACK_LENGTH_ADJUST(short,2);
1511 while (len-- > 0) {
1512 short tmp = 0;
1513 memcpy(OFF16(&tmp), s, NATINT_LEN(short,2));
1514 EXTEND16(tmp);
1515 s += NATINT_LEN(short,2);
1516 UNPACK_PUSH(INT2FIX(tmp));
1518 PACK_ITEM_ADJUST();
1519 break;
1521 case 'S':
1522 PACK_LENGTH_ADJUST(unsigned short,2);
1523 while (len-- > 0) {
1524 unsigned short tmp = 0;
1525 memcpy(OFF16(&tmp), s, NATINT_LEN(unsigned short,2));
1526 s += NATINT_LEN(unsigned short,2);
1527 UNPACK_PUSH(INT2FIX(tmp));
1529 PACK_ITEM_ADJUST();
1530 break;
1532 case 'i':
1533 PACK_LENGTH_ADJUST(int,sizeof(int));
1534 while (len-- > 0) {
1535 int tmp;
1536 memcpy(&tmp, s, sizeof(int));
1537 s += sizeof(int);
1538 UNPACK_PUSH(INT2NUM(tmp));
1540 PACK_ITEM_ADJUST();
1541 break;
1543 case 'I':
1544 PACK_LENGTH_ADJUST(unsigned int,sizeof(unsigned int));
1545 while (len-- > 0) {
1546 unsigned int tmp;
1547 memcpy(&tmp, s, sizeof(unsigned int));
1548 s += sizeof(unsigned int);
1549 UNPACK_PUSH(UINT2NUM(tmp));
1551 PACK_ITEM_ADJUST();
1552 break;
1554 case 'l':
1555 PACK_LENGTH_ADJUST(long,4);
1556 while (len-- > 0) {
1557 long tmp = 0;
1558 memcpy(OFF32(&tmp), s, NATINT_LEN(long,4));
1559 EXTEND32(tmp);
1560 s += NATINT_LEN(long,4);
1561 UNPACK_PUSH(LONG2NUM(tmp));
1563 PACK_ITEM_ADJUST();
1564 break;
1565 case 'L':
1566 PACK_LENGTH_ADJUST(unsigned long,4);
1567 while (len-- > 0) {
1568 unsigned long tmp = 0;
1569 memcpy(OFF32(&tmp), s, NATINT_LEN(unsigned long,4));
1570 s += NATINT_LEN(unsigned long,4);
1571 UNPACK_PUSH(ULONG2NUM(tmp));
1573 PACK_ITEM_ADJUST();
1574 break;
1576 case 'q':
1577 PACK_LENGTH_ADJUST_SIZE(QUAD_SIZE);
1578 while (len-- > 0) {
1579 char *tmp = (char*)s;
1580 s += QUAD_SIZE;
1581 UNPACK_PUSH(rb_quad_unpack(tmp, 1));
1583 PACK_ITEM_ADJUST();
1584 break;
1585 case 'Q':
1586 PACK_LENGTH_ADJUST_SIZE(QUAD_SIZE);
1587 while (len-- > 0) {
1588 char *tmp = (char*)s;
1589 s += QUAD_SIZE;
1590 UNPACK_PUSH(rb_quad_unpack(tmp, 0));
1592 break;
1594 case 'n':
1595 PACK_LENGTH_ADJUST(unsigned short,2);
1596 while (len-- > 0) {
1597 unsigned short tmp = 0;
1598 memcpy(OFF16B(&tmp), s, NATINT_LEN(unsigned short,2));
1599 s += NATINT_LEN(unsigned short,2);
1600 UNPACK_PUSH(UINT2NUM(ntohs(tmp)));
1602 PACK_ITEM_ADJUST();
1603 break;
1605 case 'N':
1606 PACK_LENGTH_ADJUST(unsigned long,4);
1607 while (len-- > 0) {
1608 unsigned long tmp = 0;
1609 memcpy(OFF32B(&tmp), s, NATINT_LEN(unsigned long,4));
1610 s += NATINT_LEN(unsigned long,4);
1611 UNPACK_PUSH(ULONG2NUM(ntohl(tmp)));
1613 PACK_ITEM_ADJUST();
1614 break;
1616 case 'v':
1617 PACK_LENGTH_ADJUST(unsigned short,2);
1618 while (len-- > 0) {
1619 unsigned short tmp = 0;
1620 memcpy(&tmp, s, NATINT_LEN(unsigned short,2));
1621 s += NATINT_LEN(unsigned short,2);
1622 UNPACK_PUSH(UINT2NUM(vtohs(tmp)));
1624 PACK_ITEM_ADJUST();
1625 break;
1627 case 'V':
1628 PACK_LENGTH_ADJUST(unsigned long,4);
1629 while (len-- > 0) {
1630 unsigned long tmp = 0;
1631 memcpy(&tmp, s, NATINT_LEN(long,4));
1632 s += NATINT_LEN(long,4);
1633 UNPACK_PUSH(ULONG2NUM(vtohl(tmp)));
1635 PACK_ITEM_ADJUST();
1636 break;
1638 case 'f':
1639 case 'F':
1640 PACK_LENGTH_ADJUST(float,sizeof(float));
1641 while (len-- > 0) {
1642 float tmp;
1643 memcpy(&tmp, s, sizeof(float));
1644 s += sizeof(float);
1645 UNPACK_PUSH(DOUBLE2NUM((double)tmp));
1647 PACK_ITEM_ADJUST();
1648 break;
1650 case 'e':
1651 PACK_LENGTH_ADJUST(float,sizeof(float));
1652 while (len-- > 0) {
1653 float tmp;
1654 FLOAT_CONVWITH(ftmp);
1656 memcpy(&tmp, s, sizeof(float));
1657 s += sizeof(float);
1658 tmp = VTOHF(tmp,ftmp);
1659 UNPACK_PUSH(DOUBLE2NUM((double)tmp));
1661 PACK_ITEM_ADJUST();
1662 break;
1664 case 'E':
1665 PACK_LENGTH_ADJUST(double,sizeof(double));
1666 while (len-- > 0) {
1667 double tmp;
1668 DOUBLE_CONVWITH(dtmp);
1670 memcpy(&tmp, s, sizeof(double));
1671 s += sizeof(double);
1672 tmp = VTOHD(tmp,dtmp);
1673 UNPACK_PUSH(DOUBLE2NUM(tmp));
1675 PACK_ITEM_ADJUST();
1676 break;
1678 case 'D':
1679 case 'd':
1680 PACK_LENGTH_ADJUST(double,sizeof(double));
1681 while (len-- > 0) {
1682 double tmp;
1683 memcpy(&tmp, s, sizeof(double));
1684 s += sizeof(double);
1685 UNPACK_PUSH(DOUBLE2NUM(tmp));
1687 PACK_ITEM_ADJUST();
1688 break;
1690 case 'g':
1691 PACK_LENGTH_ADJUST(float,sizeof(float));
1692 while (len-- > 0) {
1693 float tmp;
1694 FLOAT_CONVWITH(ftmp;)
1696 memcpy(&tmp, s, sizeof(float));
1697 s += sizeof(float);
1698 tmp = NTOHF(tmp,ftmp);
1699 UNPACK_PUSH(DOUBLE2NUM((double)tmp));
1701 PACK_ITEM_ADJUST();
1702 break;
1704 case 'G':
1705 PACK_LENGTH_ADJUST(double,sizeof(double));
1706 while (len-- > 0) {
1707 double tmp;
1708 DOUBLE_CONVWITH(dtmp);
1710 memcpy(&tmp, s, sizeof(double));
1711 s += sizeof(double);
1712 tmp = NTOHD(tmp,dtmp);
1713 UNPACK_PUSH(DOUBLE2NUM(tmp));
1715 PACK_ITEM_ADJUST();
1716 break;
1718 case 'U':
1719 if (len > send - s) len = send - s;
1720 while (len > 0 && s < send) {
1721 long alen = send - s;
1722 unsigned long l;
1724 l = utf8_to_uv(s, &alen);
1725 s += alen; len--;
1726 UNPACK_PUSH(ULONG2NUM(l));
1728 break;
1730 case 'u':
1732 VALUE buf = infected_str_new(0, (send - s)*3/4, str);
1733 char *ptr = RSTRING_PTR(buf);
1734 long total = 0;
1736 while (s < send && *s > ' ' && *s < 'a') {
1737 long a,b,c,d;
1738 char hunk[4];
1740 hunk[3] = '\0';
1741 len = (*s++ - ' ') & 077;
1742 total += len;
1743 if (total > RSTRING_LEN(buf)) {
1744 len -= total - RSTRING_LEN(buf);
1745 total = RSTRING_LEN(buf);
1748 while (len > 0) {
1749 long mlen = len > 3 ? 3 : len;
1751 if (s < send && *s >= ' ')
1752 a = (*s++ - ' ') & 077;
1753 else
1754 a = 0;
1755 if (s < send && *s >= ' ')
1756 b = (*s++ - ' ') & 077;
1757 else
1758 b = 0;
1759 if (s < send && *s >= ' ')
1760 c = (*s++ - ' ') & 077;
1761 else
1762 c = 0;
1763 if (s < send && *s >= ' ')
1764 d = (*s++ - ' ') & 077;
1765 else
1766 d = 0;
1767 hunk[0] = a << 2 | b >> 4;
1768 hunk[1] = b << 4 | c >> 2;
1769 hunk[2] = c << 6 | d;
1770 memcpy(ptr, hunk, mlen);
1771 ptr += mlen;
1772 len -= mlen;
1774 if (*s == '\r') s++;
1775 if (*s == '\n') s++;
1776 else if (s < send && (s+1 == send || s[1] == '\n'))
1777 s += 2; /* possible checksum byte */
1780 rb_str_set_len(buf, total);
1781 UNPACK_PUSH(buf);
1783 break;
1785 case 'm':
1787 VALUE buf = infected_str_new(0, (send - s)*3/4, str);
1788 char *ptr = RSTRING_PTR(buf);
1789 int a = -1,b = -1,c = 0,d;
1790 static signed char b64_xtable[256];
1792 if (b64_xtable['/'] <= 0) {
1793 int i;
1795 for (i = 0; i < 256; i++) {
1796 b64_xtable[i] = -1;
1798 for (i = 0; i < 64; i++) {
1799 b64_xtable[(unsigned char)b64_table[i]] = i;
1802 while (s < send) {
1803 a = b = c = d = -1;
1804 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1805 if (s >= send) break;
1806 s++;
1807 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1808 if (s >= send) break;
1809 s++;
1810 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1811 if (*s == '=' || s >= send) break;
1812 s++;
1813 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1814 if (*s == '=' || s >= send) break;
1815 s++;
1816 *ptr++ = a << 2 | b >> 4;
1817 *ptr++ = b << 4 | c >> 2;
1818 *ptr++ = c << 6 | d;
1820 if (a != -1 && b != -1) {
1821 if (c == -1 && *s == '=')
1822 *ptr++ = a << 2 | b >> 4;
1823 else if (c != -1 && *s == '=') {
1824 *ptr++ = a << 2 | b >> 4;
1825 *ptr++ = b << 4 | c >> 2;
1828 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1829 UNPACK_PUSH(buf);
1831 break;
1833 case 'M':
1835 VALUE buf = infected_str_new(0, send - s, str);
1836 char *ptr = RSTRING_PTR(buf);
1837 int c1, c2;
1839 while (s < send) {
1840 if (*s == '=') {
1841 if (++s == send) break;
1842 if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1843 s++;
1844 if (*s != '\n') {
1845 if ((c1 = hex2num(*s)) == -1) break;
1846 if (++s == send) break;
1847 if ((c2 = hex2num(*s)) == -1) break;
1848 *ptr++ = c1 << 4 | c2;
1851 else {
1852 *ptr++ = *s;
1854 s++;
1856 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1857 UNPACK_PUSH(buf);
1859 break;
1861 case '@':
1862 if (len > RSTRING_LEN(str))
1863 rb_raise(rb_eArgError, "@ outside of string");
1864 s = RSTRING_PTR(str) + len;
1865 break;
1867 case 'X':
1868 if (len > s - RSTRING_PTR(str))
1869 rb_raise(rb_eArgError, "X outside of string");
1870 s -= len;
1871 break;
1873 case 'x':
1874 if (len > send - s)
1875 rb_raise(rb_eArgError, "x outside of string");
1876 s += len;
1877 break;
1879 case 'P':
1880 if (sizeof(char *) <= send - s) {
1881 VALUE tmp = Qnil;
1882 char *t;
1884 memcpy(&t, s, sizeof(char *));
1885 s += sizeof(char *);
1887 if (t) {
1888 VALUE a, *p, *pend;
1890 if (!(a = rb_str_associated(str))) {
1891 rb_raise(rb_eArgError, "no associated pointer");
1893 p = RARRAY_PTR(a);
1894 pend = p + RARRAY_LEN(a);
1895 while (p < pend) {
1896 if (TYPE(*p) == T_STRING && RSTRING_PTR(*p) == t) {
1897 if (len < RSTRING_LEN(*p)) {
1898 tmp = rb_tainted_str_new(t, len);
1899 rb_str_associate(tmp, a);
1901 else {
1902 tmp = *p;
1904 break;
1906 p++;
1908 if (p == pend) {
1909 rb_raise(rb_eArgError, "non associated pointer");
1912 UNPACK_PUSH(tmp);
1914 break;
1916 case 'p':
1917 if (len > (send - s) / sizeof(char *))
1918 len = (send - s) / sizeof(char *);
1919 while (len-- > 0) {
1920 if (send - s < sizeof(char *))
1921 break;
1922 else {
1923 VALUE tmp = Qnil;
1924 char *t;
1926 memcpy(&t, s, sizeof(char *));
1927 s += sizeof(char *);
1929 if (t) {
1930 VALUE a, *p, *pend;
1932 if (!(a = rb_str_associated(str))) {
1933 rb_raise(rb_eArgError, "no associated pointer");
1935 p = RARRAY_PTR(a);
1936 pend = p + RARRAY_LEN(a);
1937 while (p < pend) {
1938 if (TYPE(*p) == T_STRING && RSTRING_PTR(*p) == t) {
1939 tmp = *p;
1940 break;
1942 p++;
1944 if (p == pend) {
1945 rb_raise(rb_eArgError, "non associated pointer");
1948 UNPACK_PUSH(tmp);
1951 break;
1953 case 'w':
1955 unsigned long ul = 0;
1956 unsigned long ulmask = 0xfeUL << ((sizeof(unsigned long) - 1) * 8);
1958 while (len > 0 && s < send) {
1959 ul <<= 7;
1960 ul |= (*s & 0x7f);
1961 if (!(*s++ & 0x80)) {
1962 UNPACK_PUSH(ULONG2NUM(ul));
1963 len--;
1964 ul = 0;
1966 else if (ul & ulmask) {
1967 VALUE big = rb_uint2big(ul);
1968 VALUE big128 = rb_uint2big(128);
1969 while (s < send) {
1970 big = rb_big_mul(big, big128);
1971 big = rb_big_plus(big, rb_uint2big(*s & 0x7f));
1972 if (!(*s++ & 0x80)) {
1973 UNPACK_PUSH(big);
1974 len--;
1975 ul = 0;
1976 break;
1982 break;
1984 default:
1985 break;
1989 return ary;
1992 #define BYTEWIDTH 8
1995 rb_uv_to_utf8(char buf[6], unsigned long uv)
1997 if (uv <= 0x7f) {
1998 buf[0] = (char)uv;
1999 return 1;
2001 if (uv <= 0x7ff) {
2002 buf[0] = ((uv>>6)&0xff)|0xc0;
2003 buf[1] = (uv&0x3f)|0x80;
2004 return 2;
2006 if (uv <= 0xffff) {
2007 buf[0] = ((uv>>12)&0xff)|0xe0;
2008 buf[1] = ((uv>>6)&0x3f)|0x80;
2009 buf[2] = (uv&0x3f)|0x80;
2010 return 3;
2012 if (uv <= 0x1fffff) {
2013 buf[0] = ((uv>>18)&0xff)|0xf0;
2014 buf[1] = ((uv>>12)&0x3f)|0x80;
2015 buf[2] = ((uv>>6)&0x3f)|0x80;
2016 buf[3] = (uv&0x3f)|0x80;
2017 return 4;
2019 if (uv <= 0x3ffffff) {
2020 buf[0] = ((uv>>24)&0xff)|0xf8;
2021 buf[1] = ((uv>>18)&0x3f)|0x80;
2022 buf[2] = ((uv>>12)&0x3f)|0x80;
2023 buf[3] = ((uv>>6)&0x3f)|0x80;
2024 buf[4] = (uv&0x3f)|0x80;
2025 return 5;
2027 if (uv <= 0x7fffffff) {
2028 buf[0] = ((uv>>30)&0xff)|0xfc;
2029 buf[1] = ((uv>>24)&0x3f)|0x80;
2030 buf[2] = ((uv>>18)&0x3f)|0x80;
2031 buf[3] = ((uv>>12)&0x3f)|0x80;
2032 buf[4] = ((uv>>6)&0x3f)|0x80;
2033 buf[5] = (uv&0x3f)|0x80;
2034 return 6;
2036 rb_raise(rb_eRangeError, "pack(U): value out of range");
2039 static const unsigned long utf8_limits[] = {
2040 0x0, /* 1 */
2041 0x80, /* 2 */
2042 0x800, /* 3 */
2043 0x10000, /* 4 */
2044 0x200000, /* 5 */
2045 0x4000000, /* 6 */
2046 0x80000000, /* 7 */
2049 static unsigned long
2050 utf8_to_uv(const char *p, long *lenp)
2052 int c = *p++ & 0xff;
2053 unsigned long uv = c;
2054 long n;
2056 if (!(uv & 0x80)) {
2057 *lenp = 1;
2058 return uv;
2060 if (!(uv & 0x40)) {
2061 *lenp = 1;
2062 rb_raise(rb_eArgError, "malformed UTF-8 character");
2065 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
2066 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
2067 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
2068 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
2069 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
2070 else {
2071 *lenp = 1;
2072 rb_raise(rb_eArgError, "malformed UTF-8 character");
2074 if (n > *lenp) {
2075 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
2076 n, *lenp);
2078 *lenp = n--;
2079 if (n != 0) {
2080 while (n--) {
2081 c = *p++ & 0xff;
2082 if ((c & 0xc0) != 0x80) {
2083 *lenp -= n + 1;
2084 rb_raise(rb_eArgError, "malformed UTF-8 character");
2086 else {
2087 c &= 0x3f;
2088 uv = uv << 6 | c;
2092 n = *lenp - 1;
2093 if (uv < utf8_limits[n]) {
2094 rb_raise(rb_eArgError, "redundant UTF-8 sequence");
2096 return uv;
2099 void
2100 Init_pack(void)
2102 rb_define_method(rb_cArray, "pack", pack_pack, 1);
2103 rb_define_method(rb_cString, "unpack", pack_unpack, 1);