b8aab3a058b9097adb758038bb86c5bd1d1116cc
[guile-r6rs-libs.git] / src / bytevector.c
blobb8aab3a058b9097adb758038bb86c5bd1d1116cc
1 /* Guile-R6RS-Libs --- Implementation of R6RS standard libraries.
2 Copyright (C) 2007, 2008, 2009 Ludovic Courtès <ludo@gnu.org>
4 Guile-R6RS-Libs is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 Guile-R6RS-Libs is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with Guile-R6RS-Libs; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
22 #include <alloca.h>
24 #include <libguile.h>
25 #include <gmp.h>
27 #include "bytevector.h"
28 #include "ieee-754.h"
29 #include "uniconv.h"
30 #include "striconveh.h"
31 #include "utils.h"
33 #include <byteswap.h>
35 #ifdef HAVE_LIMITS_H
36 # include <limits.h>
37 #else
38 /* Assuming 32-bit longs. */
39 # define ULONG_MAX 4294967295UL
40 #endif
42 #include <string.h>
46 /* Utilities. */
48 /* Convenience macros. These are used by the various templates (macros) that
49 are parameterized by integer signedness. */
50 #define INT8_T_signed scm_t_int8
51 #define INT8_T_unsigned scm_t_uint8
52 #define INT16_T_signed scm_t_int16
53 #define INT16_T_unsigned scm_t_uint16
54 #define INT32_T_signed scm_t_int32
55 #define INT32_T_unsigned scm_t_uint32
56 #define is_signed_int8(_x) (((_x) >= -128L) && ((_x) <= 127L))
57 #define is_unsigned_int8(_x) ((_x) <= 255UL)
58 #define is_signed_int16(_x) (((_x) >= -32768L) && ((_x) <= 32767L))
59 #define is_unsigned_int16(_x) ((_x) <= 65535UL)
60 #define is_signed_int32(_x) (((_x) >= -2147483648L) && ((_x) <= 2147483647L))
61 #define is_unsigned_int32(_x) ((_x) <= 4294967295UL)
62 #define SIGNEDNESS_signed 1
63 #define SIGNEDNESS_unsigned 0
65 #define INT_TYPE(_size, _sign) INT ## _size ## _T_ ## _sign
66 #define INT_SWAP(_size) bswap_ ## _size
67 #define INT_VALID_P(_size, _sign) is_ ## _sign ## _int ## _size
68 #define SIGNEDNESS(_sign) SIGNEDNESS_ ## _sign
71 #define INTEGER_ACCESSOR_PROLOGUE(_len, _sign) \
72 unsigned c_len, c_index; \
73 _sign char *c_bv; \
75 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv); \
76 c_index = scm_to_uint (index); \
78 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv); \
79 c_bv = (_sign char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv); \
81 if (SCM_UNLIKELY (c_index + ((_len) >> 3UL) - 1 >= c_len)) \
82 scm_out_of_range (FUNC_NAME, index);
84 /* Template for fixed-size integer access (only 8, 16 or 32-bit). */
85 #define INTEGER_REF(_len, _sign) \
86 SCM result; \
88 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
89 SCM_VALIDATE_SYMBOL (3, endianness); \
91 { \
92 INT_TYPE (_len, _sign) c_result; \
94 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
95 if (!scm_is_eq (endianness, native_endianness)) \
96 c_result = INT_SWAP (_len) (c_result); \
98 result = SCM_I_MAKINUM (c_result); \
99 } \
101 return result;
103 /* Template for fixed-size integer access using the native endianness. */
104 #define INTEGER_NATIVE_REF(_len, _sign) \
105 SCM result; \
107 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
110 INT_TYPE (_len, _sign) c_result; \
112 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
113 result = SCM_I_MAKINUM (c_result); \
116 return result;
118 /* Template for fixed-size integer modification (only 8, 16 or 32-bit). */
119 #define INTEGER_SET(_len, _sign) \
120 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
121 SCM_VALIDATE_SYMBOL (3, endianness); \
124 _sign long c_value; \
125 INT_TYPE (_len, _sign) c_value_short; \
127 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
128 scm_wrong_type_arg (FUNC_NAME, 3, value); \
130 c_value = SCM_I_INUM (value); \
131 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
132 scm_out_of_range (FUNC_NAME, value); \
134 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
135 if (!scm_is_eq (endianness, native_endianness)) \
136 c_value_short = INT_SWAP (_len) (c_value_short); \
138 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
141 return SCM_UNSPECIFIED;
143 /* Template for fixed-size integer modification using the native
144 endianness. */
145 #define INTEGER_NATIVE_SET(_len, _sign) \
146 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
149 _sign long c_value; \
150 INT_TYPE (_len, _sign) c_value_short; \
152 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
153 scm_wrong_type_arg (FUNC_NAME, 3, value); \
155 c_value = SCM_I_INUM (value); \
156 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
157 scm_out_of_range (FUNC_NAME, value); \
159 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
161 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
164 return SCM_UNSPECIFIED;
168 /* Bytevector type. */
170 SCM_GLOBAL_SMOB (scm_tc16_r6rs_bytevector, "r6rs-bytevector", 0);
172 #define SCM_R6RS_BYTEVECTOR_SET_LENGTH(_bv, _len) \
173 SCM_SET_SMOB_DATA ((_bv), (scm_t_bits) (_len))
174 #define SCM_R6RS_BYTEVECTOR_SET_CONTENTS(_bv, _buf) \
175 SCM_SET_SMOB_DATA_2 ((_bv), (scm_t_bits) (_buf))
177 /* The empty bytevector. */
178 SCM scm_r6rs_null_bytevector = SCM_UNSPECIFIED;
181 static inline SCM
182 make_bytevector_from_buffer (unsigned len, signed char *contents)
184 /* Assuming LEN > SCM_R6RS_BYTEVECTOR_INLINE_THRESHOLD. */
185 SCM_RETURN_NEWSMOB2 (scm_tc16_r6rs_bytevector, len, contents);
188 static inline SCM
189 make_bytevector (unsigned len)
191 SCM bv;
193 if (SCM_UNLIKELY (len == 0))
194 bv = scm_r6rs_null_bytevector;
195 else
197 signed char *contents = NULL;
199 if (!SCM_R6RS_BYTEVECTOR_INLINEABLE_SIZE_P (len))
200 contents = (signed char *) scm_gc_malloc (len, SCM_GC_BYTEVECTOR);
202 bv = make_bytevector_from_buffer (len, contents);
205 return bv;
208 /* Return a new bytevector of size LEN octets. */
210 scm_r6rs_c_make_bytevector (unsigned len)
212 return (make_bytevector (len));
215 /* Return a bytevector of size LEN made up of CONTENTS. The area pointed to
216 by CONTENTS must have been allocated using `scm_gc_malloc ()'. */
218 scm_r6rs_c_take_bytevector (signed char *contents, unsigned len)
220 SCM bv;
222 if (SCM_UNLIKELY (SCM_R6RS_BYTEVECTOR_INLINEABLE_SIZE_P (len)))
224 /* Copy CONTENTS into an "in-line" buffer, then free CONTENTS. */
225 signed char *c_bv;
227 bv = make_bytevector (len);
228 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
229 memcpy (c_bv, contents, len);
230 scm_gc_free (contents, len, SCM_GC_BYTEVECTOR);
232 else
233 bv = make_bytevector_from_buffer (len, contents);
235 return bv;
238 /* Shrink BV to C_NEW_LEN (which is assumed to be smaller than its current
239 size) and return BV. */
241 scm_r6rs_i_shrink_bytevector (SCM bv, unsigned c_new_len)
243 if (!SCM_R6RS_BYTEVECTOR_INLINE_P (bv))
245 unsigned c_len;
246 signed char *c_bv, *c_new_bv;
248 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
249 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
251 SCM_R6RS_BYTEVECTOR_SET_LENGTH (bv, c_new_len);
253 if (SCM_R6RS_BYTEVECTOR_INLINEABLE_SIZE_P (c_new_len))
255 /* Copy to the in-line buffer and free the current buffer. */
256 c_new_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
257 memcpy (c_new_bv, c_bv, c_new_len);
258 scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
260 else
262 /* Resize the existing buffer. */
263 c_new_bv = scm_gc_realloc (c_bv, c_len, c_new_len,
264 SCM_GC_BYTEVECTOR);
265 SCM_R6RS_BYTEVECTOR_SET_CONTENTS (bv, c_new_bv);
269 return bv;
272 SCM_SMOB_PRINT (scm_tc16_r6rs_bytevector, print_bytevector,
273 bv, port, pstate)
275 unsigned c_len, i;
276 unsigned char *c_bv;
278 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
279 c_bv = (unsigned char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
281 scm_puts ("#vu8(", port);
282 for (i = 0; i < c_len; i++)
284 if (i > 0)
285 scm_putc (' ', port);
287 scm_uintprint (c_bv[i], 10, port);
290 scm_putc (')', port);
292 /* Make GCC think we use it. */
293 scm_remember_upto_here ((SCM) pstate);
295 return 1;
298 SCM_SMOB_FREE (scm_tc16_r6rs_bytevector, free_bytevector, bv)
301 if (!SCM_R6RS_BYTEVECTOR_INLINE_P (bv))
303 unsigned c_len;
304 signed char *c_bv;
306 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
307 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
309 scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
312 return 0;
317 /* General operations. */
319 SCM_SYMBOL (scm_sym_big, "big");
320 SCM_SYMBOL (scm_sym_little, "little");
322 SCM scm_r6rs_endianness_big, scm_r6rs_endianness_little;
324 /* Host endianness (a symbol). */
325 static SCM native_endianness = SCM_UNSPECIFIED;
327 /* Byte-swapping. */
328 #ifndef bswap_24
329 # define bswap_24(_x) \
330 ((((_x) & 0xff0000) >> 16) | \
331 (((_x) & 0x00ff00)) | \
332 (((_x) & 0x0000ff) << 16))
333 #endif
336 SCM_DEFINE (scm_r6rs_native_endianness, "native-endianness", 0, 0, 0,
337 (void),
338 "Return a symbol denoting the machine's native endianness.")
340 return native_endianness;
343 SCM_DEFINE (scm_r6rs_bytevector_p, "bytevector?", 1, 0, 0,
344 (SCM obj),
345 "Return true if @var{obj} is a bytevector.")
347 return (scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_r6rs_bytevector,
348 obj)));
351 SCM_DEFINE (scm_r6rs_make_bytevector, "make-bytevector", 1, 1, 0,
352 (SCM len, SCM fill),
353 "Return a newly allocated bytevector of @var{len} bytes, "
354 "optionally filled with @var{fill}.")
355 #define FUNC_NAME s_scm_r6rs_make_bytevector
357 SCM bv;
358 unsigned c_len;
359 signed char c_fill = '\0';
361 SCM_VALIDATE_UINT_COPY (1, len, c_len);
362 if (fill != SCM_UNDEFINED)
364 int value;
366 value = scm_to_int (fill);
367 if (SCM_UNLIKELY ((value < -128) || (value > 255)))
368 scm_out_of_range (FUNC_NAME, fill);
369 c_fill = (signed char) value;
372 bv = make_bytevector (c_len);
373 if (fill != SCM_UNDEFINED)
375 unsigned i;
376 signed char *contents;
378 contents = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
379 for (i = 0; i < c_len; i++)
380 contents[i] = c_fill;
383 return bv;
385 #undef FUNC_NAME
387 SCM_DEFINE (scm_r6rs_bytevector_length, "bytevector-length", 1, 0, 0,
388 (SCM bv),
389 "Return the length (in bytes) of @var{bv}.")
390 #define FUNC_NAME s_scm_r6rs_bytevector_length
392 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv);
394 return (scm_from_uint (SCM_R6RS_BYTEVECTOR_LENGTH (bv)));
396 #undef FUNC_NAME
398 SCM_DEFINE (scm_r6rs_bytevector_eq_p, "bytevector=?", 2, 0, 0,
399 (SCM bv1, SCM bv2),
400 "Return is @var{bv1} equals to @var{bv2}---i.e., if they "
401 "have the same length and contents.")
402 #define FUNC_NAME s_scm_r6rs_bytevector_eq_p
404 SCM result = SCM_BOOL_F;
405 unsigned c_len1, c_len2;
407 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv1);
408 SCM_VALIDATE_R6RS_BYTEVECTOR (2, bv2);
410 c_len1 = SCM_R6RS_BYTEVECTOR_LENGTH (bv1);
411 c_len2 = SCM_R6RS_BYTEVECTOR_LENGTH (bv2);
413 if (c_len1 == c_len2)
415 signed char *c_bv1, *c_bv2;
417 c_bv1 = SCM_R6RS_BYTEVECTOR_CONTENTS (bv1);
418 c_bv2 = SCM_R6RS_BYTEVECTOR_CONTENTS (bv2);
420 result = scm_from_bool (!memcmp (c_bv1, c_bv2, c_len1));
423 return result;
425 #undef FUNC_NAME
427 SCM_DEFINE (scm_r6rs_bytevector_fill_x, "bytevector-fill!", 2, 0, 0,
428 (SCM bv, SCM fill),
429 "Fill bytevector @var{bv} with @var{fill}, a byte.")
430 #define FUNC_NAME s_scm_r6rs_bytevector_fill_x
432 unsigned c_len, i;
433 signed char *c_bv, c_fill;
435 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv);
436 c_fill = scm_to_int8 (fill);
438 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
439 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
441 for (i = 0; i < c_len; i++)
442 c_bv[i] = c_fill;
444 return SCM_UNSPECIFIED;
446 #undef FUNC_NAME
448 SCM_DEFINE (scm_r6rs_bytevector_copy_x, "bytevector-copy!", 5, 0, 0,
449 (SCM source, SCM source_start, SCM target, SCM target_start,
450 SCM len),
451 "Copy @var{len} bytes from @var{source} into @var{target}, "
452 "starting reading from @var{source_start} (a positive index "
453 "within @var{source}) and start writing at "
454 "@var{target_start}.")
455 #define FUNC_NAME s_scm_r6rs_bytevector_copy_x
457 unsigned c_len, c_source_len, c_target_len;
458 unsigned c_source_start, c_target_start;
459 signed char *c_source, *c_target;
461 SCM_VALIDATE_R6RS_BYTEVECTOR (1, source);
462 SCM_VALIDATE_R6RS_BYTEVECTOR (3, target);
464 c_len = scm_to_uint (len);
465 c_source_start = scm_to_uint (source_start);
466 c_target_start = scm_to_uint (target_start);
468 c_source = SCM_R6RS_BYTEVECTOR_CONTENTS (source);
469 c_target = SCM_R6RS_BYTEVECTOR_CONTENTS (target);
470 c_source_len = SCM_R6RS_BYTEVECTOR_LENGTH (source);
471 c_target_len = SCM_R6RS_BYTEVECTOR_LENGTH (target);
473 if (SCM_UNLIKELY (c_source_start + c_len > c_source_len))
474 scm_out_of_range (FUNC_NAME, source_start);
475 if (SCM_UNLIKELY (c_target_start + c_len > c_target_len))
476 scm_out_of_range (FUNC_NAME, target_start);
478 memcpy (c_target + c_target_start,
479 c_source + c_source_start,
480 c_len);
482 return SCM_UNSPECIFIED;
484 #undef FUNC_NAME
486 SCM_DEFINE (scm_r6rs_bytevector_copy, "bytevector-copy", 1, 0, 0,
487 (SCM bv),
488 "Return a newly allocated copy of @var{bv}.")
489 #define FUNC_NAME s_scm_r6rs_bytevector_copy
491 SCM copy;
492 unsigned c_len;
493 signed char *c_bv, *c_copy;
495 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv);
497 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
498 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
500 copy = make_bytevector (c_len);
501 c_copy = SCM_R6RS_BYTEVECTOR_CONTENTS (copy);
502 memcpy (c_copy, c_bv, c_len);
504 return copy;
506 #undef FUNC_NAME
509 /* Operations on bytes and octets. */
511 SCM_DEFINE (scm_r6rs_bytevector_u8_ref, "bytevector-u8-ref", 2, 0, 0,
512 (SCM bv, SCM index),
513 "Return the octet located at @var{index} in @var{bv}.")
514 #define FUNC_NAME s_scm_r6rs_bytevector_u8_ref
516 INTEGER_NATIVE_REF (8, unsigned);
518 #undef FUNC_NAME
520 SCM_DEFINE (scm_r6rs_bytevector_s8_ref, "bytevector-s8-ref", 2, 0, 0,
521 (SCM bv, SCM index),
522 "Return the byte located at @var{index} in @var{bv}.")
523 #define FUNC_NAME s_scm_r6rs_bytevector_u8_ref
525 INTEGER_NATIVE_REF (8, signed);
527 #undef FUNC_NAME
529 SCM_DEFINE (scm_r6rs_bytevector_u8_set_x, "bytevector-u8-set!", 3, 0, 0,
530 (SCM bv, SCM index, SCM value),
531 "Return the octet located at @var{index} in @var{bv}.")
532 #define FUNC_NAME s_scm_r6rs_bytevector_u8_set_x
534 INTEGER_NATIVE_SET (8, unsigned);
536 #undef FUNC_NAME
538 SCM_DEFINE (scm_r6rs_bytevector_s8_set_x, "bytevector-s8-set!", 3, 0, 0,
539 (SCM bv, SCM index, SCM value),
540 "Return the octet located at @var{index} in @var{bv}.")
541 #define FUNC_NAME s_scm_r6rs_bytevector_u8_set_x
543 INTEGER_NATIVE_SET (8, signed);
545 #undef FUNC_NAME
547 #undef OCTET_ACCESSOR_PROLOGUE
550 SCM_DEFINE (scm_r6rs_bytevector_to_u8_list, "bytevector->u8-list", 1, 0, 0,
551 (SCM bv),
552 "Return a newly allocated list of octets containing the "
553 "contents of @var{bv}.")
554 #define FUNC_NAME s_scm_r6rs_bytevector_to_u8_list
556 SCM lst, pair;
557 unsigned c_len, i;
558 unsigned char *c_bv;
560 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv);
562 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
563 c_bv = (unsigned char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
565 lst = scm_make_list (scm_from_uint (c_len), SCM_UNSPECIFIED);
566 for (i = 0, pair = lst;
567 i < c_len;
568 i++, pair = SCM_CDR (pair))
570 SCM_SETCAR (pair, SCM_I_MAKINUM (c_bv[i]));
573 return lst;
575 #undef FUNC_NAME
577 SCM_DEFINE (scm_r6rs_u8_list_to_bytevector, "u8-list->bytevector", 1, 0, 0,
578 (SCM lst),
579 "Turn @var{lst}, a list of octets, into a bytevector.")
580 #define FUNC_NAME s_scm_r6rs_u8_list_to_bytevector
582 SCM bv, item;
583 long c_len, i;
584 unsigned char *c_bv;
586 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len);
588 bv = make_bytevector (c_len);
589 c_bv = (unsigned char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
591 for (i = 0; i < c_len; lst = SCM_CDR (lst), i++)
593 item = SCM_CAR (lst);
595 if (SCM_LIKELY (SCM_I_INUMP (item)))
597 long c_item;
599 c_item = SCM_I_INUM (item);
600 if (SCM_LIKELY ((c_item >= 0) && (c_item < 256)))
601 c_bv[i] = (unsigned char) c_item;
602 else
603 goto type_error;
605 else
606 goto type_error;
609 return bv;
611 type_error:
612 scm_wrong_type_arg (FUNC_NAME, 1, item);
614 return SCM_BOOL_F;
616 #undef FUNC_NAME
618 /* Compute the two's complement of VALUE (a positive integer) on SIZE octets
619 using (2^(SIZE * 8) - VALUE). */
620 static inline void
621 twos_complement (mpz_t value, size_t size)
623 unsigned long bit_count;
625 /* We expect BIT_COUNT to fit in a unsigned long thanks to the range
626 checking on SIZE performed earlier. */
627 bit_count = (unsigned long) size << 3UL;
629 if (SCM_LIKELY (bit_count < sizeof (unsigned long)))
630 mpz_ui_sub (value, 1UL << bit_count, value);
631 else
633 mpz_t max;
635 mpz_init (max);
636 mpz_ui_pow_ui (max, 2, bit_count);
637 mpz_sub (value, max, value);
638 mpz_clear (max);
642 static inline SCM
643 bytevector_large_ref (const char *c_bv, size_t c_size, int signed_p,
644 SCM endianness)
646 SCM result;
647 mpz_t c_mpz;
648 int c_endianness, negative_p = 0;
650 if (signed_p)
652 if (scm_is_eq (endianness, scm_sym_big))
653 negative_p = c_bv[0] & 0x80;
654 else
655 negative_p = c_bv[c_size - 1] & 0x80;
658 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
660 mpz_init (c_mpz);
661 mpz_import (c_mpz, 1 /* 1 word */, 1 /* word order doesn't matter */,
662 c_size /* word is C_SIZE-byte long */,
663 c_endianness,
664 0 /* nails */, c_bv);
666 if (signed_p && negative_p)
668 twos_complement (c_mpz, c_size);
669 mpz_neg (c_mpz, c_mpz);
672 result = scm_from_mpz (c_mpz);
673 mpz_clear (c_mpz); /* FIXME: Needed? */
675 return result;
678 static inline int
679 bytevector_large_set (char *c_bv, size_t c_size, int signed_p,
680 SCM value, SCM endianness)
682 mpz_t c_mpz;
683 int c_endianness, c_sign, err = 0;
685 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
687 mpz_init (c_mpz);
688 scm_to_mpz (value, c_mpz);
690 c_sign = mpz_sgn (c_mpz);
691 if (c_sign < 0)
693 if (SCM_LIKELY (signed_p))
695 mpz_neg (c_mpz, c_mpz);
696 twos_complement (c_mpz, c_size);
698 else
700 err = -1;
701 goto finish;
705 if (c_sign == 0)
706 /* Zero. */
707 memset (c_bv, 0, c_size);
708 else
710 size_t word_count, value_size;
712 value_size = (mpz_sizeinbase (c_mpz, 2) + (8 * c_size)) / (8 * c_size);
713 if (SCM_UNLIKELY (value_size > c_size))
715 err = -2;
716 goto finish;
720 mpz_export (c_bv, &word_count, 1 /* word order doesn't matter */,
721 c_size, c_endianness,
722 0 /* nails */, c_mpz);
723 if (SCM_UNLIKELY (word_count != 1))
724 /* Shouldn't happen since we already checked with VALUE_SIZE. */
725 abort ();
728 finish:
729 mpz_clear (c_mpz);
731 return err;
734 #define GENERIC_INTEGER_ACCESSOR_PROLOGUE(_sign) \
735 unsigned long c_len, c_index, c_size; \
736 char *c_bv; \
738 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv); \
739 c_index = scm_to_ulong (index); \
740 c_size = scm_to_ulong (size); \
742 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv); \
743 c_bv = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv); \
745 /* C_SIZE must have its 3 higher bits set to zero so that \
746 multiplying it by 8 yields a number that fits in an \
747 unsigned long. */ \
748 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
749 scm_out_of_range (FUNC_NAME, size); \
750 if (SCM_UNLIKELY (c_index + c_size > c_len)) \
751 scm_out_of_range (FUNC_NAME, index);
754 /* Template of an integer reference function. */
755 #define GENERIC_INTEGER_REF(_sign) \
756 SCM result; \
758 if (c_size < 3) \
760 int swap; \
761 _sign int value; \
763 swap = !scm_is_eq (endianness, native_endianness); \
764 switch (c_size) \
766 case 1: \
768 _sign char c_value8; \
769 memcpy (&c_value8, c_bv, 1); \
770 value = c_value8; \
772 break; \
773 case 2: \
775 INT_TYPE (16, _sign) c_value16; \
776 memcpy (&c_value16, c_bv, 2); \
777 if (swap) \
778 value = (INT_TYPE (16, _sign)) bswap_16 (c_value16); \
779 else \
780 value = c_value16; \
782 break; \
783 default: \
784 abort (); \
787 result = SCM_I_MAKINUM ((_sign int) value); \
789 else \
790 result = bytevector_large_ref ((char *) c_bv, \
791 c_size, SIGNEDNESS (_sign), \
792 endianness); \
794 return result;
796 static inline SCM
797 bytevector_signed_ref (const char *c_bv, size_t c_size, SCM endianness)
799 GENERIC_INTEGER_REF (signed);
802 static inline SCM
803 bytevector_unsigned_ref (const char *c_bv, size_t c_size, SCM endianness)
805 GENERIC_INTEGER_REF (unsigned);
809 /* Template of an integer assignment function. */
810 #define GENERIC_INTEGER_SET(_sign) \
811 if (c_size < 3) \
813 _sign int c_value; \
815 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
816 goto range_error; \
818 c_value = SCM_I_INUM (value); \
819 switch (c_size) \
821 case 1: \
822 if (SCM_LIKELY (INT_VALID_P (8, _sign) (c_value))) \
824 _sign char c_value8; \
825 c_value8 = (_sign char) c_value; \
826 memcpy (c_bv, &c_value8, 1); \
828 else \
829 goto range_error; \
830 break; \
832 case 2: \
833 if (SCM_LIKELY (INT_VALID_P (16, _sign) (c_value))) \
835 int swap; \
836 INT_TYPE (16, _sign) c_value16; \
838 swap = !scm_is_eq (endianness, native_endianness); \
840 if (swap) \
841 c_value16 = (INT_TYPE (16, _sign)) bswap_16 (c_value); \
842 else \
843 c_value16 = c_value; \
845 memcpy (c_bv, &c_value16, 2); \
847 else \
848 goto range_error; \
849 break; \
851 default: \
852 abort (); \
855 else \
857 int err; \
859 err = bytevector_large_set (c_bv, c_size, \
860 SIGNEDNESS (_sign), \
861 value, endianness); \
862 if (err) \
863 goto range_error; \
866 return; \
868 range_error: \
869 scm_out_of_range (FUNC_NAME, value); \
870 return;
872 static inline void
873 bytevector_signed_set (char *c_bv, size_t c_size,
874 SCM value, SCM endianness,
875 const char *func_name)
876 #define FUNC_NAME func_name
878 GENERIC_INTEGER_SET (signed);
880 #undef FUNC_NAME
882 static inline void
883 bytevector_unsigned_set (char *c_bv, size_t c_size,
884 SCM value, SCM endianness,
885 const char *func_name)
886 #define FUNC_NAME func_name
888 GENERIC_INTEGER_SET (unsigned);
890 #undef FUNC_NAME
892 #undef GENERIC_INTEGER_SET
893 #undef GENERIC_INTEGER_REF
896 SCM_DEFINE (scm_r6rs_bytevector_uint_ref, "bytevector-uint-ref", 4, 0, 0,
897 (SCM bv, SCM index, SCM endianness, SCM size),
898 "Return the @var{size}-octet long unsigned integer at index "
899 "@var{index} in @var{bv}.")
900 #define FUNC_NAME s_scm_r6rs_bytevector_uint_ref
902 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
904 return (bytevector_unsigned_ref (&c_bv[c_index], c_size, endianness));
906 #undef FUNC_NAME
908 SCM_DEFINE (scm_r6rs_bytevector_sint_ref, "bytevector-sint-ref", 4, 0, 0,
909 (SCM bv, SCM index, SCM endianness, SCM size),
910 "Return the @var{size}-octet long unsigned integer at index "
911 "@var{index} in @var{bv}.")
912 #define FUNC_NAME s_scm_r6rs_bytevector_sint_ref
914 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
916 return (bytevector_signed_ref (&c_bv[c_index], c_size, endianness));
918 #undef FUNC_NAME
920 SCM_DEFINE (scm_r6rs_bytevector_uint_set_x, "bytevector-uint-set!", 5, 0, 0,
921 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
922 "Set the @var{size}-octet long unsigned integer at @var{index} "
923 "to @var{value}.")
924 #define FUNC_NAME s_scm_r6rs_bytevector_uint_set_x
926 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
928 bytevector_unsigned_set (&c_bv[c_index], c_size, value, endianness,
929 FUNC_NAME);
931 return SCM_UNSPECIFIED;
933 #undef FUNC_NAME
935 SCM_DEFINE (scm_r6rs_bytevector_sint_set_x, "bytevector-sint-set!", 5, 0, 0,
936 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
937 "Set the @var{size}-octet long signed integer at @var{index} "
938 "to @var{value}.")
939 #define FUNC_NAME s_scm_r6rs_bytevector_sint_set_x
941 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
943 bytevector_signed_set (&c_bv[c_index], c_size, value, endianness,
944 FUNC_NAME);
946 return SCM_UNSPECIFIED;
948 #undef FUNC_NAME
952 /* Operations on integers of arbitrary size. */
954 #define INTEGERS_TO_LIST(_sign) \
955 SCM lst, pair; \
956 size_t i, c_len, c_size; \
958 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv); \
959 SCM_VALIDATE_SYMBOL (2, endianness); \
960 c_size = scm_to_uint (size); \
962 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv); \
963 if (SCM_UNLIKELY (c_len == 0)) \
964 lst = SCM_EOL; \
965 else if (SCM_UNLIKELY (c_len < c_size)) \
966 scm_out_of_range (FUNC_NAME, size); \
967 else \
969 const char *c_bv; \
971 c_bv = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv); \
973 lst = scm_make_list (scm_from_uint (c_len / c_size), \
974 SCM_UNSPECIFIED); \
975 for (i = 0, pair = lst; \
976 i <= c_len - c_size; \
977 i += c_size, c_bv += c_size, pair = SCM_CDR (pair)) \
979 SCM_SETCAR (pair, \
980 bytevector_ ## _sign ## _ref (c_bv, c_size, \
981 endianness)); \
985 return lst;
987 SCM_DEFINE (scm_r6rs_bytevector_to_sint_list, "bytevector->sint-list",
988 3, 0, 0,
989 (SCM bv, SCM endianness, SCM size),
990 "Return a list of signed integers of @var{size} octets "
991 "representing the contents of @var{bv}.")
992 #define FUNC_NAME s_scm_r6rs_bytevector_to_sint_list
994 INTEGERS_TO_LIST (signed);
996 #undef FUNC_NAME
998 SCM_DEFINE (scm_r6rs_bytevector_to_uint_list, "bytevector->uint-list",
999 3, 0, 0,
1000 (SCM bv, SCM endianness, SCM size),
1001 "Return a list of unsigned integers of @var{size} octets "
1002 "representing the contents of @var{bv}.")
1003 #define FUNC_NAME s_scm_r6rs_bytevector_to_uint_list
1005 INTEGERS_TO_LIST (unsigned);
1007 #undef FUNC_NAME
1009 #undef INTEGER_TO_LIST
1012 #define INTEGER_LIST_TO_BYTEVECTOR(_sign) \
1013 SCM bv; \
1014 long c_len; \
1015 size_t c_size; \
1016 char *c_bv, *c_bv_ptr; \
1018 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len); \
1019 SCM_VALIDATE_SYMBOL (2, endianness); \
1020 c_size = scm_to_uint (size); \
1022 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
1023 scm_out_of_range (FUNC_NAME, size); \
1025 bv = make_bytevector (c_len * c_size); \
1026 c_bv = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv); \
1028 for (c_bv_ptr = c_bv; \
1029 !scm_is_null (lst); \
1030 lst = SCM_CDR (lst), c_bv_ptr += c_size) \
1032 bytevector_ ## _sign ## _set (c_bv_ptr, c_size, \
1033 SCM_CAR (lst), endianness, \
1034 FUNC_NAME); \
1037 return bv;
1040 SCM_DEFINE (scm_r6rs_uint_list_to_bytevector, "uint-list->bytevector",
1041 3, 0, 0,
1042 (SCM lst, SCM endianness, SCM size),
1043 "Return a bytevector containing the unsigned integers "
1044 "listed in @var{lst} and encoded on @var{size} octets "
1045 "according to @var{endianness}.")
1046 #define FUNC_NAME s_scm_r6rs_uint_list_to_bytevector
1048 INTEGER_LIST_TO_BYTEVECTOR (unsigned);
1050 #undef FUNC_NAME
1052 SCM_DEFINE (scm_r6rs_sint_list_to_bytevector, "sint-list->bytevector",
1053 3, 0, 0,
1054 (SCM lst, SCM endianness, SCM size),
1055 "Return a bytevector containing the signed integers "
1056 "listed in @var{lst} and encoded on @var{size} octets "
1057 "according to @var{endianness}.")
1058 #define FUNC_NAME s_scm_r6rs_sint_list_to_bytevector
1060 INTEGER_LIST_TO_BYTEVECTOR (signed);
1062 #undef FUNC_NAME
1064 #undef INTEGER_LIST_TO_BYTEVECTOR
1068 /* Operations on 16-bit integers. */
1070 SCM_DEFINE (scm_r6rs_bytevector_u16_ref, "bytevector-u16-ref",
1071 3, 0, 0,
1072 (SCM bv, SCM index, SCM endianness),
1073 "Return the unsigned 16-bit integer from @var{bv} at "
1074 "@var{index}.")
1075 #define FUNC_NAME s_scm_r6rs_bytevector_u16_ref
1077 INTEGER_REF (16, unsigned);
1079 #undef FUNC_NAME
1081 SCM_DEFINE (scm_r6rs_bytevector_s16_ref, "bytevector-s16-ref",
1082 3, 0, 0,
1083 (SCM bv, SCM index, SCM endianness),
1084 "Return the signed 16-bit integer from @var{bv} at "
1085 "@var{index}.")
1086 #define FUNC_NAME s_scm_r6rs_bytevector_s16_ref
1088 INTEGER_REF (16, signed);
1090 #undef FUNC_NAME
1092 SCM_DEFINE (scm_r6rs_bytevector_u16_native_ref, "bytevector-u16-native-ref",
1093 2, 0, 0,
1094 (SCM bv, SCM index),
1095 "Return the unsigned 16-bit integer from @var{bv} at "
1096 "@var{index} using the native endianness.")
1097 #define FUNC_NAME s_scm_r6rs_bytevector_u16_native_ref
1099 INTEGER_NATIVE_REF (16, unsigned);
1101 #undef FUNC_NAME
1103 SCM_DEFINE (scm_r6rs_bytevector_s16_native_ref, "bytevector-s16-native-ref",
1104 2, 0, 0,
1105 (SCM bv, SCM index),
1106 "Return the unsigned 16-bit integer from @var{bv} at "
1107 "@var{index} using the native endianness.")
1108 #define FUNC_NAME s_scm_r6rs_bytevector_u16_native_ref
1110 INTEGER_NATIVE_REF (16, signed);
1112 #undef FUNC_NAME
1114 SCM_DEFINE (scm_r6rs_bytevector_u16_set_x, "bytevector-u16-set!",
1115 4, 0, 0,
1116 (SCM bv, SCM index, SCM value, SCM endianness),
1117 "Store @var{value} in @var{bv} at @var{index} according to "
1118 "@var{endianness}.")
1119 #define FUNC_NAME s_scm_r6rs_bytevector_u16_set_x
1121 INTEGER_SET (16, unsigned);
1123 #undef FUNC_NAME
1125 SCM_DEFINE (scm_r6rs_bytevector_s16_set_x, "bytevector-s16-set!",
1126 4, 0, 0,
1127 (SCM bv, SCM index, SCM value, SCM endianness),
1128 "Store @var{value} in @var{bv} at @var{index} according to "
1129 "@var{endianness}.")
1130 #define FUNC_NAME s_scm_r6rs_bytevector_s16_set_x
1132 INTEGER_SET (16, signed);
1134 #undef FUNC_NAME
1136 SCM_DEFINE (scm_r6rs_bytevector_u16_native_set_x, "bytevector-u16-native-set!",
1137 3, 0, 0,
1138 (SCM bv, SCM index, SCM value),
1139 "Store the unsigned integer @var{value} at index @var{index} "
1140 "of @var{bv} using the native endianness.")
1141 #define FUNC_NAME s_scm_r6rs_bytevector_u16_native_set_x
1143 INTEGER_NATIVE_SET (16, unsigned);
1145 #undef FUNC_NAME
1147 SCM_DEFINE (scm_r6rs_bytevector_s16_native_set_x, "bytevector-s16-native-set!",
1148 3, 0, 0,
1149 (SCM bv, SCM index, SCM value),
1150 "Store the signed integer @var{value} at index @var{index} "
1151 "of @var{bv} using the native endianness.")
1152 #define FUNC_NAME s_scm_r6rs_bytevector_u16_native_set_x
1154 INTEGER_NATIVE_SET (16, signed);
1156 #undef FUNC_NAME
1160 /* Operations on 32-bit integers. */
1162 /* Unfortunately, on 32-bit machines `SCM' is not large enough to hold
1163 arbitrary 32-bit integers. Thus we fall back to using the
1164 `large_{ref,set}' variants on 32-bit machines. */
1166 #define LARGE_INTEGER_REF(_len, _sign) \
1167 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1168 SCM_VALIDATE_SYMBOL (3, endianness); \
1170 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1171 SIGNEDNESS (_sign), endianness));
1173 #define LARGE_INTEGER_SET(_len, _sign) \
1174 int err; \
1175 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1176 SCM_VALIDATE_SYMBOL (4, endianness); \
1178 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1179 SIGNEDNESS (_sign), value, endianness); \
1180 if (SCM_UNLIKELY (err)) \
1181 scm_out_of_range (FUNC_NAME, value); \
1183 return SCM_UNSPECIFIED;
1185 #define LARGE_INTEGER_NATIVE_REF(_len, _sign) \
1186 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1187 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1188 SIGNEDNESS (_sign), native_endianness));
1190 #define LARGE_INTEGER_NATIVE_SET(_len, _sign) \
1191 int err; \
1192 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1194 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1195 SIGNEDNESS (_sign), value, \
1196 native_endianness); \
1197 if (SCM_UNLIKELY (err)) \
1198 scm_out_of_range (FUNC_NAME, value); \
1200 return SCM_UNSPECIFIED;
1203 SCM_DEFINE (scm_r6rs_bytevector_u32_ref, "bytevector-u32-ref",
1204 3, 0, 0,
1205 (SCM bv, SCM index, SCM endianness),
1206 "Return the unsigned 32-bit integer from @var{bv} at "
1207 "@var{index}.")
1208 #define FUNC_NAME s_scm_r6rs_bytevector_u32_ref
1210 #if SIZEOF_VOID_P > 4
1211 INTEGER_REF (32, unsigned);
1212 #else
1213 LARGE_INTEGER_REF (32, unsigned);
1214 #endif
1216 #undef FUNC_NAME
1218 SCM_DEFINE (scm_r6rs_bytevector_s32_ref, "bytevector-s32-ref",
1219 3, 0, 0,
1220 (SCM bv, SCM index, SCM endianness),
1221 "Return the signed 32-bit integer from @var{bv} at "
1222 "@var{index}.")
1223 #define FUNC_NAME s_scm_r6rs_bytevector_s32_ref
1225 #if SIZEOF_VOID_P > 4
1226 INTEGER_REF (32, signed);
1227 #else
1228 LARGE_INTEGER_REF (32, signed);
1229 #endif
1231 #undef FUNC_NAME
1233 SCM_DEFINE (scm_r6rs_bytevector_u32_native_ref, "bytevector-u32-native-ref",
1234 2, 0, 0,
1235 (SCM bv, SCM index),
1236 "Return the unsigned 32-bit integer from @var{bv} at "
1237 "@var{index} using the native endianness.")
1238 #define FUNC_NAME s_scm_r6rs_bytevector_u32_native_ref
1240 #if SIZEOF_VOID_P > 4
1241 INTEGER_NATIVE_REF (32, unsigned);
1242 #else
1243 LARGE_INTEGER_NATIVE_REF (32, unsigned);
1244 #endif
1246 #undef FUNC_NAME
1248 SCM_DEFINE (scm_r6rs_bytevector_s32_native_ref, "bytevector-s32-native-ref",
1249 2, 0, 0,
1250 (SCM bv, SCM index),
1251 "Return the unsigned 32-bit integer from @var{bv} at "
1252 "@var{index} using the native endianness.")
1253 #define FUNC_NAME s_scm_r6rs_bytevector_u32_native_ref
1255 #if SIZEOF_VOID_P > 4
1256 INTEGER_NATIVE_REF (32, signed);
1257 #else
1258 LARGE_INTEGER_NATIVE_REF (32, signed);
1259 #endif
1261 #undef FUNC_NAME
1263 SCM_DEFINE (scm_r6rs_bytevector_u32_set_x, "bytevector-u32-set!",
1264 4, 0, 0,
1265 (SCM bv, SCM index, SCM value, SCM endianness),
1266 "Store @var{value} in @var{bv} at @var{index} according to "
1267 "@var{endianness}.")
1268 #define FUNC_NAME s_scm_r6rs_bytevector_u32_set_x
1270 #if SIZEOF_VOID_P > 4
1271 INTEGER_SET (32, unsigned);
1272 #else
1273 LARGE_INTEGER_SET (32, unsigned);
1274 #endif
1276 #undef FUNC_NAME
1278 SCM_DEFINE (scm_r6rs_bytevector_s32_set_x, "bytevector-s32-set!",
1279 4, 0, 0,
1280 (SCM bv, SCM index, SCM value, SCM endianness),
1281 "Store @var{value} in @var{bv} at @var{index} according to "
1282 "@var{endianness}.")
1283 #define FUNC_NAME s_scm_r6rs_bytevector_s32_set_x
1285 #if SIZEOF_VOID_P > 4
1286 INTEGER_SET (32, signed);
1287 #else
1288 LARGE_INTEGER_SET (32, signed);
1289 #endif
1291 #undef FUNC_NAME
1293 SCM_DEFINE (scm_r6rs_bytevector_u32_native_set_x, "bytevector-u32-native-set!",
1294 3, 0, 0,
1295 (SCM bv, SCM index, SCM value),
1296 "Store the unsigned integer @var{value} at index @var{index} "
1297 "of @var{bv} using the native endianness.")
1298 #define FUNC_NAME s_scm_r6rs_bytevector_u32_native_set_x
1300 #if SIZEOF_VOID_P > 4
1301 INTEGER_NATIVE_SET (32, unsigned);
1302 #else
1303 LARGE_INTEGER_NATIVE_SET (32, unsigned);
1304 #endif
1306 #undef FUNC_NAME
1308 SCM_DEFINE (scm_r6rs_bytevector_s32_native_set_x, "bytevector-s32-native-set!",
1309 3, 0, 0,
1310 (SCM bv, SCM index, SCM value),
1311 "Store the signed integer @var{value} at index @var{index} "
1312 "of @var{bv} using the native endianness.")
1313 #define FUNC_NAME s_scm_r6rs_bytevector_u32_native_set_x
1315 #if SIZEOF_VOID_P > 4
1316 INTEGER_NATIVE_SET (32, signed);
1317 #else
1318 LARGE_INTEGER_NATIVE_SET (32, signed);
1319 #endif
1321 #undef FUNC_NAME
1325 /* Operations on 64-bit integers. */
1327 /* For 64-bit integers, we use only the `large_{ref,set}' variant. */
1329 SCM_DEFINE (scm_r6rs_bytevector_u64_ref, "bytevector-u64-ref",
1330 3, 0, 0,
1331 (SCM bv, SCM index, SCM endianness),
1332 "Return the unsigned 64-bit integer from @var{bv} at "
1333 "@var{index}.")
1334 #define FUNC_NAME s_scm_r6rs_bytevector_u64_ref
1336 LARGE_INTEGER_REF (64, unsigned);
1338 #undef FUNC_NAME
1340 SCM_DEFINE (scm_r6rs_bytevector_s64_ref, "bytevector-s64-ref",
1341 3, 0, 0,
1342 (SCM bv, SCM index, SCM endianness),
1343 "Return the signed 64-bit integer from @var{bv} at "
1344 "@var{index}.")
1345 #define FUNC_NAME s_scm_r6rs_bytevector_s64_ref
1347 LARGE_INTEGER_REF (64, signed);
1349 #undef FUNC_NAME
1351 SCM_DEFINE (scm_r6rs_bytevector_u64_native_ref, "bytevector-u64-native-ref",
1352 2, 0, 0,
1353 (SCM bv, SCM index),
1354 "Return the unsigned 64-bit integer from @var{bv} at "
1355 "@var{index} using the native endianness.")
1356 #define FUNC_NAME s_scm_r6rs_bytevector_u64_native_ref
1358 LARGE_INTEGER_NATIVE_REF (64, unsigned);
1360 #undef FUNC_NAME
1362 SCM_DEFINE (scm_r6rs_bytevector_s64_native_ref, "bytevector-s64-native-ref",
1363 2, 0, 0,
1364 (SCM bv, SCM index),
1365 "Return the unsigned 64-bit integer from @var{bv} at "
1366 "@var{index} using the native endianness.")
1367 #define FUNC_NAME s_scm_r6rs_bytevector_u64_native_ref
1369 LARGE_INTEGER_NATIVE_REF (64, signed);
1371 #undef FUNC_NAME
1373 SCM_DEFINE (scm_r6rs_bytevector_u64_set_x, "bytevector-u64-set!",
1374 4, 0, 0,
1375 (SCM bv, SCM index, SCM value, SCM endianness),
1376 "Store @var{value} in @var{bv} at @var{index} according to "
1377 "@var{endianness}.")
1378 #define FUNC_NAME s_scm_r6rs_bytevector_u64_set_x
1380 LARGE_INTEGER_SET (64, unsigned);
1382 #undef FUNC_NAME
1384 SCM_DEFINE (scm_r6rs_bytevector_s64_set_x, "bytevector-s64-set!",
1385 4, 0, 0,
1386 (SCM bv, SCM index, SCM value, SCM endianness),
1387 "Store @var{value} in @var{bv} at @var{index} according to "
1388 "@var{endianness}.")
1389 #define FUNC_NAME s_scm_r6rs_bytevector_s64_set_x
1391 LARGE_INTEGER_SET (64, signed);
1393 #undef FUNC_NAME
1395 SCM_DEFINE (scm_r6rs_bytevector_u64_native_set_x, "bytevector-u64-native-set!",
1396 3, 0, 0,
1397 (SCM bv, SCM index, SCM value),
1398 "Store the unsigned integer @var{value} at index @var{index} "
1399 "of @var{bv} using the native endianness.")
1400 #define FUNC_NAME s_scm_r6rs_bytevector_u64_native_set_x
1402 LARGE_INTEGER_NATIVE_SET (64, unsigned);
1404 #undef FUNC_NAME
1406 SCM_DEFINE (scm_r6rs_bytevector_s64_native_set_x, "bytevector-s64-native-set!",
1407 3, 0, 0,
1408 (SCM bv, SCM index, SCM value),
1409 "Store the signed integer @var{value} at index @var{index} "
1410 "of @var{bv} using the native endianness.")
1411 #define FUNC_NAME s_scm_r6rs_bytevector_u64_native_set_x
1413 LARGE_INTEGER_NATIVE_SET (64, signed);
1415 #undef FUNC_NAME
1419 /* Operations on IEEE-754 numbers. */
1421 /* There are two possible word endians, visible in glibc's <ieee754.h>.
1422 However, in R6RS, when the endianness is `little', little endian is
1423 assumed for both the byte order and the word order. This is clear from
1424 Section 2.1 of R6RS-lib (in response to
1425 http://www.r6rs.org/formal-comments/comment-187.txt). */
1428 /* Convert to/from a floating-point number with different endianness. This
1429 method is probably not the most efficient but it should be portable. */
1431 static inline void
1432 float_to_foreign_endianness (union scm_r6rs_ieee754_float *target,
1433 float source)
1435 union scm_r6rs_ieee754_float src;
1437 src.f = source;
1439 #ifdef WORDS_BIGENDIAN
1440 /* Assuming little endian for both byte and word order. */
1441 target->little_endian.negative = src.big_endian.negative;
1442 target->little_endian.exponent = src.big_endian.exponent;
1443 target->little_endian.mantissa = src.big_endian.mantissa;
1444 #else
1445 target->big_endian.negative = src.little_endian.negative;
1446 target->big_endian.exponent = src.little_endian.exponent;
1447 target->big_endian.mantissa = src.little_endian.mantissa;
1448 #endif
1451 static inline float
1452 float_from_foreign_endianness (const union scm_r6rs_ieee754_float *source)
1454 union scm_r6rs_ieee754_float result;
1456 #ifdef WORDS_BIGENDIAN
1457 /* Assuming little endian for both byte and word order. */
1458 result.big_endian.negative = source->little_endian.negative;
1459 result.big_endian.exponent = source->little_endian.exponent;
1460 result.big_endian.mantissa = source->little_endian.mantissa;
1461 #else
1462 result.little_endian.negative = source->big_endian.negative;
1463 result.little_endian.exponent = source->big_endian.exponent;
1464 result.little_endian.mantissa = source->big_endian.mantissa;
1465 #endif
1467 return (result.f);
1470 static inline void
1471 double_to_foreign_endianness (union scm_r6rs_ieee754_double *target,
1472 double source)
1474 union scm_r6rs_ieee754_double src;
1476 src.d = source;
1478 #ifdef WORDS_BIGENDIAN
1479 /* Assuming little endian for both byte and word order. */
1480 target->little_little_endian.negative = src.big_endian.negative;
1481 target->little_little_endian.exponent = src.big_endian.exponent;
1482 target->little_little_endian.mantissa0 = src.big_endian.mantissa0;
1483 target->little_little_endian.mantissa1 = src.big_endian.mantissa1;
1484 #else
1485 target->big_endian.negative = src.little_little_endian.negative;
1486 target->big_endian.exponent = src.little_little_endian.exponent;
1487 target->big_endian.mantissa0 = src.little_little_endian.mantissa0;
1488 target->big_endian.mantissa1 = src.little_little_endian.mantissa1;
1489 #endif
1492 static inline double
1493 double_from_foreign_endianness (const union scm_r6rs_ieee754_double *source)
1495 union scm_r6rs_ieee754_double result;
1497 #ifdef WORDS_BIGENDIAN
1498 /* Assuming little endian for both byte and word order. */
1499 result.big_endian.negative = source->little_little_endian.negative;
1500 result.big_endian.exponent = source->little_little_endian.exponent;
1501 result.big_endian.mantissa0 = source->little_little_endian.mantissa0;
1502 result.big_endian.mantissa1 = source->little_little_endian.mantissa1;
1503 #else
1504 result.little_little_endian.negative = source->big_endian.negative;
1505 result.little_little_endian.exponent = source->big_endian.exponent;
1506 result.little_little_endian.mantissa0 = source->big_endian.mantissa0;
1507 result.little_little_endian.mantissa1 = source->big_endian.mantissa1;
1508 #endif
1510 return (result.d);
1513 /* Template macros to abstract over doubles and floats.
1514 XXX: Guile can only convert to/from doubles. */
1515 #define IEEE754_UNION(_c_type) union scm_r6rs_ieee754_ ## _c_type
1516 #define IEEE754_TO_SCM(_c_type) scm_from_double
1517 #define IEEE754_FROM_SCM(_c_type) scm_to_double
1518 #define IEEE754_FROM_FOREIGN_ENDIANNESS(_c_type) \
1519 _c_type ## _from_foreign_endianness
1520 #define IEEE754_TO_FOREIGN_ENDIANNESS(_c_type) \
1521 _c_type ## _to_foreign_endianness
1524 /* Templace getters and setters. */
1526 #define IEEE754_ACCESSOR_PROLOGUE(_type) \
1527 INTEGER_ACCESSOR_PROLOGUE (sizeof (_type) << 3UL, signed);
1529 #define IEEE754_REF(_type) \
1530 _type c_result; \
1532 IEEE754_ACCESSOR_PROLOGUE (_type); \
1533 SCM_VALIDATE_SYMBOL (3, endianness); \
1535 if (scm_is_eq (endianness, native_endianness)) \
1536 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1537 else \
1539 IEEE754_UNION (_type) c_raw; \
1541 memcpy (&c_raw, &c_bv[c_index], sizeof (c_raw)); \
1542 c_result = \
1543 IEEE754_FROM_FOREIGN_ENDIANNESS (_type) (&c_raw); \
1546 return (IEEE754_TO_SCM (_type) (c_result));
1548 #define IEEE754_NATIVE_REF(_type) \
1549 _type c_result; \
1551 IEEE754_ACCESSOR_PROLOGUE (_type); \
1553 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1554 return (IEEE754_TO_SCM (_type) (c_result));
1556 #define IEEE754_SET(_type) \
1557 _type c_value; \
1559 IEEE754_ACCESSOR_PROLOGUE (_type); \
1560 SCM_VALIDATE_REAL (3, value); \
1561 SCM_VALIDATE_SYMBOL (4, endianness); \
1562 c_value = IEEE754_FROM_SCM (_type) (value); \
1564 if (scm_is_eq (endianness, native_endianness)) \
1565 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1566 else \
1568 IEEE754_UNION (_type) c_raw; \
1570 IEEE754_TO_FOREIGN_ENDIANNESS (_type) (&c_raw, c_value); \
1571 memcpy (&c_bv[c_index], &c_raw, sizeof (c_raw)); \
1574 return SCM_UNSPECIFIED;
1576 #define IEEE754_NATIVE_SET(_type) \
1577 _type c_value; \
1579 IEEE754_ACCESSOR_PROLOGUE (_type); \
1580 SCM_VALIDATE_REAL (3, value); \
1581 c_value = IEEE754_FROM_SCM (_type) (value); \
1583 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1584 return SCM_UNSPECIFIED;
1587 /* Single precision. */
1589 SCM_DEFINE (scm_r6rs_bytevector_ieee_single_ref,
1590 "bytevector-ieee-single-ref",
1591 3, 0, 0,
1592 (SCM bv, SCM index, SCM endianness),
1593 "Return the IEEE-754 single from @var{bv} at "
1594 "@var{index}.")
1595 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_single_ref
1597 IEEE754_REF (float);
1599 #undef FUNC_NAME
1601 SCM_DEFINE (scm_r6rs_bytevector_ieee_single_native_ref,
1602 "bytevector-ieee-single-native-ref",
1603 2, 0, 0,
1604 (SCM bv, SCM index),
1605 "Return the IEEE-754 single from @var{bv} at "
1606 "@var{index} using the native endianness.")
1607 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_single_native_ref
1609 IEEE754_NATIVE_REF (float);
1611 #undef FUNC_NAME
1613 SCM_DEFINE (scm_r6rs_bytevector_ieee_single_set_x,
1614 "bytevector-ieee-single-set!",
1615 4, 0, 0,
1616 (SCM bv, SCM index, SCM value, SCM endianness),
1617 "Store real @var{value} in @var{bv} at @var{index} according to "
1618 "@var{endianness}.")
1619 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_single_set_x
1621 IEEE754_SET (float);
1623 #undef FUNC_NAME
1625 SCM_DEFINE (scm_r6rs_bytevector_ieee_single_native_set_x,
1626 "bytevector-ieee-single-native-set!",
1627 3, 0, 0,
1628 (SCM bv, SCM index, SCM value),
1629 "Store the real @var{value} at index @var{index} "
1630 "of @var{bv} using the native endianness.")
1631 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_single_native_set_x
1633 IEEE754_NATIVE_SET (float);
1635 #undef FUNC_NAME
1638 /* Double precision. */
1640 SCM_DEFINE (scm_r6rs_bytevector_ieee_double_ref,
1641 "bytevector-ieee-double-ref",
1642 3, 0, 0,
1643 (SCM bv, SCM index, SCM endianness),
1644 "Return the IEEE-754 double from @var{bv} at "
1645 "@var{index}.")
1646 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_double_ref
1648 IEEE754_REF (double);
1650 #undef FUNC_NAME
1652 SCM_DEFINE (scm_r6rs_bytevector_ieee_double_native_ref,
1653 "bytevector-ieee-double-native-ref",
1654 2, 0, 0,
1655 (SCM bv, SCM index),
1656 "Return the IEEE-754 double from @var{bv} at "
1657 "@var{index} using the native endianness.")
1658 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_double_native_ref
1660 IEEE754_NATIVE_REF (double);
1662 #undef FUNC_NAME
1664 SCM_DEFINE (scm_r6rs_bytevector_ieee_double_set_x,
1665 "bytevector-ieee-double-set!",
1666 4, 0, 0,
1667 (SCM bv, SCM index, SCM value, SCM endianness),
1668 "Store real @var{value} in @var{bv} at @var{index} according to "
1669 "@var{endianness}.")
1670 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_double_set_x
1672 IEEE754_SET (double);
1674 #undef FUNC_NAME
1676 SCM_DEFINE (scm_r6rs_bytevector_ieee_double_native_set_x,
1677 "bytevector-ieee-double-native-set!",
1678 3, 0, 0,
1679 (SCM bv, SCM index, SCM value),
1680 "Store the real @var{value} at index @var{index} "
1681 "of @var{bv} using the native endianness.")
1682 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_double_native_set_x
1684 IEEE754_NATIVE_SET (double);
1686 #undef FUNC_NAME
1689 #undef IEEE754_UNION
1690 #undef IEEE754_TO_SCM
1691 #undef IEEE754_FROM_SCM
1692 #undef IEEE754_FROM_FOREIGN_ENDIANNESS
1693 #undef IEEE754_TO_FOREIGN_ENDIANNESS
1694 #undef IEEE754_REF
1695 #undef IEEE754_NATIVE_REF
1696 #undef IEEE754_SET
1697 #undef IEEE754_NATIVE_SET
1700 /* Operations on strings. */
1703 /* Produce a function that returns the length of a UTF-encoded string. */
1704 #define UTF_STRLEN_FUNCTION(_utf_width) \
1705 static inline size_t \
1706 utf ## _utf_width ## _strlen (const uint ## _utf_width ## _t *str) \
1708 size_t len = 0; \
1709 const uint ## _utf_width ## _t *ptr; \
1710 for (ptr = str; \
1711 *ptr != 0; \
1712 ptr++) \
1714 len++; \
1717 return (len * ((_utf_width) / 8)); \
1720 UTF_STRLEN_FUNCTION (8)
1723 /* Return the length (in bytes) of STR, a UTF-(UTF_WIDTH) encoded string. */
1724 #define UTF_STRLEN(_utf_width, _str) \
1725 utf ## _utf_width ## _strlen (_str)
1727 /* Return the "portable" name of the UTF encoding of size UTF_WIDTH and
1728 ENDIANNESS (Gnulib's `iconv_open' module guarantees the portability of the
1729 encoding name). */
1730 static inline void
1731 utf_encoding_name (char *name, size_t utf_width, SCM endianness)
1733 strcpy (name, "UTF-");
1734 strcat (name, ((utf_width == 8)
1735 ? "8"
1736 : ((utf_width == 16)
1737 ? "16"
1738 : ((utf_width == 32)
1739 ? "32"
1740 : "??"))));
1741 strcat (name,
1742 ((scm_is_eq (endianness, scm_sym_big))
1743 ? "BE"
1744 : ((scm_is_eq (endianness, scm_sym_little))
1745 ? "LE"
1746 : "unknown")));
1749 /* Maximum length of a UTF encoding name. */
1750 #define MAX_UTF_ENCODING_NAME_LEN 16
1752 /* Produce the body of a `string->utf' function. */
1753 #define STRING_TO_UTF(_utf_width) \
1754 SCM utf; \
1755 int err; \
1756 char *c_str; \
1757 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1758 char *c_utf = NULL, *c_locale; \
1759 size_t c_strlen, c_raw_strlen, c_utf_len = 0; \
1761 SCM_VALIDATE_STRING (1, str); \
1762 if (endianness == SCM_UNDEFINED) \
1763 endianness = scm_sym_big; \
1764 else \
1765 SCM_VALIDATE_SYMBOL (2, endianness); \
1767 c_strlen = scm_c_string_length (str); \
1768 c_raw_strlen = c_strlen * ((_utf_width) / 8); \
1769 do \
1771 c_str = (char *) alloca (c_raw_strlen + 1); \
1772 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen); \
1774 while (c_raw_strlen > c_strlen); \
1775 c_str[c_raw_strlen] = '\0'; \
1777 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1779 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
1780 strcpy (c_locale, locale_charset ()); \
1782 err = mem_iconveh (c_str, c_raw_strlen, \
1783 c_locale, c_utf_name, \
1784 iconveh_question_mark, NULL, \
1785 &c_utf, &c_utf_len); \
1786 if (SCM_UNLIKELY (err)) \
1787 scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \
1788 scm_list_1 (str), err); \
1789 else \
1790 /* C_UTF is null-terminated. */ \
1791 utf = scm_r6rs_c_take_bytevector ((signed char *) c_utf, \
1792 c_utf_len); \
1794 return (utf);
1798 SCM_DEFINE (scm_r6rs_string_to_utf8, "string->utf8",
1799 1, 0, 0,
1800 (SCM str),
1801 "Return a newly allocated bytevector that contains the UTF-8 "
1802 "encoding of @var{str}.")
1803 #define FUNC_NAME s_scm_r6rs_string_to_utf8
1805 SCM utf;
1806 char *c_str;
1807 uint8_t *c_utf;
1808 size_t c_strlen, c_raw_strlen;
1810 SCM_VALIDATE_STRING (1, str);
1812 c_strlen = scm_c_string_length (str);
1813 c_raw_strlen = c_strlen;
1816 c_str = (char *) alloca (c_raw_strlen + 1);
1817 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen);
1819 while (c_raw_strlen > c_strlen);
1820 c_str[c_raw_strlen] = '\0';
1822 c_utf = u8_strconv_from_locale (c_str);
1823 if (SCM_UNLIKELY (c_utf == NULL))
1824 scm_syserror (FUNC_NAME);
1825 else
1826 /* C_UTF is null-terminated. */
1827 utf = scm_r6rs_c_take_bytevector ((signed char *) c_utf,
1828 UTF_STRLEN (8, c_utf));
1830 return (utf);
1832 #undef FUNC_NAME
1834 SCM_DEFINE (scm_r6rs_string_to_utf16, "string->utf16",
1835 1, 1, 0,
1836 (SCM str, SCM endianness),
1837 "Return a newly allocated bytevector that contains the UTF-16 "
1838 "encoding of @var{str}.")
1839 #define FUNC_NAME s_scm_r6rs_string_to_utf16
1841 STRING_TO_UTF (16);
1843 #undef FUNC_NAME
1845 SCM_DEFINE (scm_r6rs_string_to_utf32, "string->utf32",
1846 1, 1, 0,
1847 (SCM str, SCM endianness),
1848 "Return a newly allocated bytevector that contains the UTF-32 "
1849 "encoding of @var{str}.")
1850 #define FUNC_NAME s_scm_r6rs_string_to_utf32
1852 STRING_TO_UTF (32);
1854 #undef FUNC_NAME
1857 /* Produce the body of a function that converts a UTF-encoded bytevector to a
1858 string. */
1859 #define UTF_TO_STRING(_utf_width) \
1860 SCM str = SCM_BOOL_F; \
1861 int err; \
1862 char *c_str = NULL, *c_locale; \
1863 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1864 const char *c_utf; \
1865 size_t c_strlen = 0, c_utf_len; \
1867 SCM_VALIDATE_R6RS_BYTEVECTOR (1, utf); \
1868 if (endianness == SCM_UNDEFINED) \
1869 endianness = scm_sym_big; \
1870 else \
1871 SCM_VALIDATE_SYMBOL (2, endianness); \
1873 c_utf_len = SCM_R6RS_BYTEVECTOR_LENGTH (utf); \
1874 c_utf = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (utf); \
1875 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1877 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
1878 strcpy (c_locale, locale_charset ()); \
1880 err = mem_iconveh (c_utf, c_utf_len, \
1881 c_utf_name, c_locale, \
1882 iconveh_question_mark, NULL, \
1883 &c_str, &c_strlen); \
1884 if (SCM_UNLIKELY (err)) \
1885 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A", \
1886 scm_list_1 (utf), err); \
1887 else \
1888 /* C_STR is null-terminated. */ \
1889 str = scm_take_locale_stringn (c_str, c_strlen); \
1891 return (str);
1894 SCM_DEFINE (scm_r6rs_utf8_to_string, "utf8->string",
1895 1, 0, 0,
1896 (SCM utf),
1897 "Return a newly allocate string that contains from the UTF-8-"
1898 "encoded contents of bytevector @var{utf}.")
1899 #define FUNC_NAME s_scm_r6rs_utf8_to_string
1901 SCM str;
1902 int err;
1903 char *c_str = NULL, *c_locale;
1904 const char *c_utf;
1905 size_t c_utf_len, c_strlen = 0;
1907 SCM_VALIDATE_R6RS_BYTEVECTOR (1, utf);
1909 c_utf_len = SCM_R6RS_BYTEVECTOR_LENGTH (utf);
1911 c_locale = (char *) alloca (strlen (locale_charset ()) + 1);
1912 strcpy (c_locale, locale_charset ());
1914 c_utf = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (utf);
1915 err = mem_iconveh (c_utf, c_utf_len,
1916 "UTF-8", c_locale,
1917 iconveh_question_mark, NULL,
1918 &c_str, &c_strlen);
1919 if (SCM_UNLIKELY (err))
1920 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A",
1921 scm_list_1 (utf), err);
1922 else
1923 /* C_STR is null-terminated. */
1924 str = scm_take_locale_stringn (c_str, c_strlen);
1926 return (str);
1928 #undef FUNC_NAME
1930 SCM_DEFINE (scm_r6rs_utf16_to_string, "utf16->string",
1931 1, 1, 0,
1932 (SCM utf, SCM endianness),
1933 "Return a newly allocate string that contains from the UTF-17-"
1934 "encoded contents of bytevector @var{utf}.")
1935 #define FUNC_NAME s_scm_r6rs_utf16_to_string
1937 UTF_TO_STRING (16);
1939 #undef FUNC_NAME
1941 SCM_DEFINE (scm_r6rs_utf32_to_string, "utf32->string",
1942 1, 1, 0,
1943 (SCM utf, SCM endianness),
1944 "Return a newly allocate string that contains from the UTF-17-"
1945 "encoded contents of bytevector @var{utf}.")
1946 #define FUNC_NAME s_scm_r6rs_utf32_to_string
1948 UTF_TO_STRING (32);
1950 #undef FUNC_NAME
1954 /* Initialization. */
1956 void
1957 scm_init_r6rs_bytevector (void)
1959 #include "bytevector.x"
1961 #ifdef WORDS_BIGENDIAN
1962 native_endianness = scm_sym_big;
1963 #else
1964 native_endianness = scm_sym_little;
1965 #endif
1967 scm_r6rs_endianness_big = scm_sym_big;
1968 scm_r6rs_endianness_little = scm_sym_little;
1970 scm_r6rs_null_bytevector =
1971 scm_gc_protect_object (make_bytevector_from_buffer (0, NULL));