ports: Accept `#f' as a transcoder argument.
[guile-r6rs-libs.git] / src / bytevector.c
blobd260f50ee4ebf6fc83d0a9fe2831504331879ce1
1 /* Guile-R6RS-Libs --- Implementation of R6RS standard libraries.
2 Copyright (C) 2007, 2008, 2009 Ludovic Courtès <ludo@gnu.org>
4 Guile-R6RS-Libs is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 Guile-R6RS-Libs is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with Guile-R6RS-Libs; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
22 #include <libguile.h>
23 #include <gmp.h>
25 #include "bytevector.h"
26 #include "ieee-754.h"
27 #include "uniconv.h"
28 #include "striconveh.h"
29 #include "utils.h"
31 #include <byteswap.h>
33 #ifdef HAVE_LIMITS_H
34 # include <limits.h>
35 #else
36 /* Assuming 32-bit longs. */
37 # define ULONG_MAX 4294967295UL
38 #endif
40 #include <string.h>
44 /* Utilities. */
46 /* Convenience macros. These are used by the various templates (macros) that
47 are parameterized by integer signedness. */
48 #define INT8_T_signed scm_t_int8
49 #define INT8_T_unsigned scm_t_uint8
50 #define INT16_T_signed scm_t_int16
51 #define INT16_T_unsigned scm_t_uint16
52 #define is_signed_int8(_x) (((_x) >= -128L) && ((_x) <= 127L))
53 #define is_unsigned_int8(_x) ((_x) <= 255UL)
54 #define is_signed_int16(_x) (((_x) >= -32768L) && ((_x) <= 32767L))
55 #define is_unsigned_int16(_x) ((_x) <= 65535UL)
56 #define SIGNEDNESS_signed 1
57 #define SIGNEDNESS_unsigned 0
59 #define INT_TYPE(_size, _sign) INT ## _size ## _T_ ## _sign
60 #define INT_SWAP(_size) bswap_ ## _size
61 #define INT_VALID_P(_size, _sign) is_ ## _sign ## _int ## _size
62 #define SIGNEDNESS(_sign) SIGNEDNESS_ ## _sign
65 #define INTEGER_ACCESSOR_PROLOGUE(_len, _sign) \
66 unsigned c_len, c_index; \
67 _sign char *c_bv; \
69 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv); \
70 c_index = scm_to_uint (index); \
72 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv); \
73 c_bv = (_sign char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv); \
75 if (SCM_UNLIKELY (c_index + ((_len) >> 3UL) - 1 >= c_len)) \
76 scm_out_of_range (FUNC_NAME, index);
78 /* Template for fixed-size integer access (only 8, 16 or 32-bit). */
79 #define INTEGER_REF(_len, _sign) \
80 SCM result; \
82 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
83 SCM_VALIDATE_SYMBOL (3, endianness); \
85 { \
86 INT_TYPE (_len, _sign) c_result; \
88 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
89 if (!scm_is_eq (endianness, native_endianness)) \
90 c_result = INT_SWAP (_len) (c_result); \
92 result = SCM_I_MAKINUM (c_result); \
93 } \
95 return result;
97 /* Template for fixed-size integer access using the native endianness. */
98 #define INTEGER_NATIVE_REF(_len, _sign) \
99 SCM result; \
101 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
104 INT_TYPE (_len, _sign) c_result; \
106 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
107 result = SCM_I_MAKINUM (c_result); \
110 return result;
112 /* Template for fixed-size integer modification (only 8, 16 or 32-bit). */
113 #define INTEGER_SET(_len, _sign) \
114 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
115 SCM_VALIDATE_SYMBOL (3, endianness); \
118 _sign long c_value; \
119 INT_TYPE (_len, _sign) c_value_short; \
121 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
122 scm_wrong_type_arg (FUNC_NAME, 3, value); \
124 c_value = SCM_I_INUM (value); \
125 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
126 scm_out_of_range (FUNC_NAME, value); \
128 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
129 if (!scm_is_eq (endianness, native_endianness)) \
130 c_value_short = INT_SWAP (_len) (c_value_short); \
132 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
135 return SCM_UNSPECIFIED;
137 /* Template for fixed-size integer modification using the native
138 endianness. */
139 #define INTEGER_NATIVE_SET(_len, _sign) \
140 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
143 _sign long c_value; \
144 INT_TYPE (_len, _sign) c_value_short; \
146 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
147 scm_wrong_type_arg (FUNC_NAME, 3, value); \
149 c_value = SCM_I_INUM (value); \
150 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
151 scm_out_of_range (FUNC_NAME, value); \
153 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
155 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
158 return SCM_UNSPECIFIED;
162 /* Bytevector type. */
164 SCM_GLOBAL_SMOB (scm_tc16_r6rs_bytevector, "r6rs-bytevector", 0);
166 #define SCM_R6RS_BYTEVECTOR_SET_LENGTH(_bv, _len) \
167 SCM_SET_SMOB_DATA ((_bv), (scm_t_bits) (_len))
168 #define SCM_R6RS_BYTEVECTOR_SET_CONTENTS(_bv, _buf) \
169 SCM_SET_SMOB_DATA_2 ((_bv), (scm_t_bits) (_buf))
171 /* The empty bytevector. */
172 SCM scm_r6rs_null_bytevector = SCM_UNSPECIFIED;
175 static inline SCM
176 make_bytevector_from_buffer (unsigned len, signed char *contents)
178 /* Assuming LEN > SCM_R6RS_BYTEVECTOR_INLINE_THRESHOLD. */
179 SCM_RETURN_NEWSMOB2 (scm_tc16_r6rs_bytevector, len, contents);
182 static inline SCM
183 make_bytevector (unsigned len)
185 SCM bv;
187 if (SCM_UNLIKELY (len == 0))
188 bv = scm_r6rs_null_bytevector;
189 else
191 signed char *contents = NULL;
193 if (!SCM_R6RS_BYTEVECTOR_INLINEABLE_SIZE_P (len))
194 contents = (signed char *) scm_gc_malloc (len, SCM_GC_BYTEVECTOR);
196 bv = make_bytevector_from_buffer (len, contents);
199 return bv;
202 /* Return a new bytevector of size LEN octets. */
204 scm_r6rs_c_make_bytevector (unsigned len)
206 return (make_bytevector (len));
209 /* Return a bytevector of size LEN made up of CONTENTS. The area pointed to
210 by CONTENTS must have been allocated using `scm_gc_malloc ()'. */
212 scm_r6rs_c_take_bytevector (signed char *contents, unsigned len)
214 SCM bv;
216 if (SCM_UNLIKELY (SCM_R6RS_BYTEVECTOR_INLINEABLE_SIZE_P (len)))
218 /* Copy CONTENTS into an "in-line" buffer, then free CONTENTS. */
219 signed char *c_bv;
221 bv = make_bytevector (len);
222 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
223 memcpy (c_bv, contents, len);
224 scm_gc_free (contents, len, SCM_GC_BYTEVECTOR);
226 else
227 bv = make_bytevector_from_buffer (len, contents);
229 return bv;
232 /* Shrink BV to C_NEW_LEN (which is assumed to be smaller than its current
233 size) and return BV. */
235 scm_r6rs_i_shrink_bytevector (SCM bv, unsigned c_new_len)
237 if (!SCM_R6RS_BYTEVECTOR_INLINE_P (bv))
239 unsigned c_len;
240 signed char *c_bv, *c_new_bv;
242 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
243 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
245 SCM_R6RS_BYTEVECTOR_SET_LENGTH (bv, c_new_len);
247 if (SCM_R6RS_BYTEVECTOR_INLINEABLE_SIZE_P (c_new_len))
249 /* Copy to the in-line buffer and free the current buffer. */
250 c_new_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
251 memcpy (c_new_bv, c_bv, c_new_len);
252 scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
254 else
256 /* Resize the existing buffer. */
257 c_new_bv = scm_gc_realloc (c_bv, c_len, c_new_len,
258 SCM_GC_BYTEVECTOR);
259 SCM_R6RS_BYTEVECTOR_SET_CONTENTS (bv, c_new_bv);
263 return bv;
266 SCM_SMOB_PRINT (scm_tc16_r6rs_bytevector, print_bytevector,
267 bv, port, pstate)
269 unsigned c_len, i;
270 unsigned char *c_bv;
272 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
273 c_bv = (unsigned char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
275 scm_puts ("#vu8(", port);
276 for (i = 0; i < c_len; i++)
278 if (i > 0)
279 scm_putc (' ', port);
281 scm_uintprint (c_bv[i], 10, port);
284 scm_putc (')', port);
286 /* Make GCC think we use it. */
287 scm_remember_upto_here ((SCM) pstate);
289 return 1;
292 SCM_SMOB_FREE (scm_tc16_r6rs_bytevector, free_bytevector, bv)
295 if (!SCM_R6RS_BYTEVECTOR_INLINE_P (bv))
297 unsigned c_len;
298 signed char *c_bv;
300 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
301 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
303 scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
306 return 0;
311 /* General operations. */
313 SCM_SYMBOL (scm_sym_big, "big");
314 SCM_SYMBOL (scm_sym_little, "little");
316 SCM scm_r6rs_endianness_big, scm_r6rs_endianness_little;
318 /* Host endianness (a symbol). */
319 static SCM native_endianness = SCM_UNSPECIFIED;
321 /* Byte-swapping. */
322 #ifndef bswap_24
323 # define bswap_24(_x) \
324 ((((_x) & 0xff0000) >> 16) | \
325 (((_x) & 0x00ff00)) | \
326 (((_x) & 0x0000ff) << 16))
327 #endif
330 SCM_DEFINE (scm_r6rs_native_endianness, "native-endianness", 0, 0, 0,
331 (void),
332 "Return a symbol denoting the machine's native endianness.")
334 return native_endianness;
337 SCM_DEFINE (scm_r6rs_bytevector_p, "bytevector?", 1, 0, 0,
338 (SCM obj),
339 "Return true if @var{obj} is a bytevector.")
341 return (scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_r6rs_bytevector,
342 obj)));
345 SCM_DEFINE (scm_r6rs_make_bytevector, "make-bytevector", 1, 1, 0,
346 (SCM len, SCM fill),
347 "Return a newly allocated bytevector of @var{len} bytes, "
348 "optionally filled with @var{fill}.")
349 #define FUNC_NAME s_scm_r6rs_make_bytevector
351 SCM bv;
352 unsigned c_len;
353 signed char c_fill = '\0';
355 SCM_VALIDATE_UINT_COPY (1, len, c_len);
356 if (fill != SCM_UNDEFINED)
358 int value;
360 value = scm_to_int (fill);
361 if (SCM_UNLIKELY ((value < -128) || (value > 255)))
362 scm_out_of_range (FUNC_NAME, fill);
363 c_fill = (signed char) value;
366 bv = make_bytevector (c_len);
367 if (fill != SCM_UNDEFINED)
369 unsigned i;
370 signed char *contents;
372 contents = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
373 for (i = 0; i < c_len; i++)
374 contents[i] = c_fill;
377 return bv;
379 #undef FUNC_NAME
381 SCM_DEFINE (scm_r6rs_bytevector_length, "bytevector-length", 1, 0, 0,
382 (SCM bv),
383 "Return the length (in bytes) of @var{bv}.")
384 #define FUNC_NAME s_scm_r6rs_bytevector_length
386 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv);
388 return (scm_from_uint (SCM_R6RS_BYTEVECTOR_LENGTH (bv)));
390 #undef FUNC_NAME
392 SCM_DEFINE (scm_r6rs_bytevector_eq_p, "bytevector=?", 2, 0, 0,
393 (SCM bv1, SCM bv2),
394 "Return is @var{bv1} equals to @var{bv2}---i.e., if they "
395 "have the same length and contents.")
396 #define FUNC_NAME s_scm_r6rs_bytevector_eq_p
398 SCM result = SCM_BOOL_F;
399 unsigned c_len1, c_len2;
401 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv1);
402 SCM_VALIDATE_R6RS_BYTEVECTOR (2, bv2);
404 c_len1 = SCM_R6RS_BYTEVECTOR_LENGTH (bv1);
405 c_len2 = SCM_R6RS_BYTEVECTOR_LENGTH (bv2);
407 if (c_len1 == c_len2)
409 signed char *c_bv1, *c_bv2;
411 c_bv1 = SCM_R6RS_BYTEVECTOR_CONTENTS (bv1);
412 c_bv2 = SCM_R6RS_BYTEVECTOR_CONTENTS (bv2);
414 result = scm_from_bool (!memcmp (c_bv1, c_bv2, c_len1));
417 return result;
419 #undef FUNC_NAME
421 SCM_DEFINE (scm_r6rs_bytevector_fill_x, "bytevector-fill!", 2, 0, 0,
422 (SCM bv, SCM fill),
423 "Fill bytevector @var{bv} with @var{fill}, a byte.")
424 #define FUNC_NAME s_scm_r6rs_bytevector_fill_x
426 unsigned c_len, i;
427 signed char *c_bv, c_fill;
429 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv);
430 c_fill = scm_to_int8 (fill);
432 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
433 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
435 for (i = 0; i < c_len; i++)
436 c_bv[i] = c_fill;
438 return SCM_UNSPECIFIED;
440 #undef FUNC_NAME
442 SCM_DEFINE (scm_r6rs_bytevector_copy_x, "bytevector-copy!", 5, 0, 0,
443 (SCM source, SCM source_start, SCM target, SCM target_start,
444 SCM len),
445 "Copy @var{len} bytes from @var{source} into @var{target}, "
446 "starting reading from @var{source_start} (a positive index "
447 "within @var{source}) and start writing at "
448 "@var{target_start}.")
449 #define FUNC_NAME s_scm_r6rs_bytevector_copy_x
451 unsigned c_len, c_source_len, c_target_len;
452 unsigned c_source_start, c_target_start;
453 signed char *c_source, *c_target;
455 SCM_VALIDATE_R6RS_BYTEVECTOR (1, source);
456 SCM_VALIDATE_R6RS_BYTEVECTOR (3, target);
458 c_len = scm_to_uint (len);
459 c_source_start = scm_to_uint (source_start);
460 c_target_start = scm_to_uint (target_start);
462 c_source = SCM_R6RS_BYTEVECTOR_CONTENTS (source);
463 c_target = SCM_R6RS_BYTEVECTOR_CONTENTS (target);
464 c_source_len = SCM_R6RS_BYTEVECTOR_LENGTH (source);
465 c_target_len = SCM_R6RS_BYTEVECTOR_LENGTH (target);
467 if (SCM_UNLIKELY (c_source_start + c_len > c_source_len))
468 scm_out_of_range (FUNC_NAME, source_start);
469 if (SCM_UNLIKELY (c_target_start + c_len > c_target_len))
470 scm_out_of_range (FUNC_NAME, target_start);
472 memcpy (c_target + c_target_start,
473 c_source + c_source_start,
474 c_len);
476 return SCM_UNSPECIFIED;
478 #undef FUNC_NAME
480 SCM_DEFINE (scm_r6rs_bytevector_copy, "bytevector-copy", 1, 0, 0,
481 (SCM bv),
482 "Return a newly allocated copy of @var{bv}.")
483 #define FUNC_NAME s_scm_r6rs_bytevector_copy
485 SCM copy;
486 unsigned c_len;
487 signed char *c_bv, *c_copy;
489 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv);
491 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
492 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
494 copy = make_bytevector (c_len);
495 c_copy = SCM_R6RS_BYTEVECTOR_CONTENTS (copy);
496 memcpy (c_copy, c_bv, c_len);
498 return copy;
500 #undef FUNC_NAME
503 /* Operations on bytes and octets. */
505 SCM_DEFINE (scm_r6rs_bytevector_u8_ref, "bytevector-u8-ref", 2, 0, 0,
506 (SCM bv, SCM index),
507 "Return the octet located at @var{index} in @var{bv}.")
508 #define FUNC_NAME s_scm_r6rs_bytevector_u8_ref
510 INTEGER_NATIVE_REF (8, unsigned);
512 #undef FUNC_NAME
514 SCM_DEFINE (scm_r6rs_bytevector_s8_ref, "bytevector-s8-ref", 2, 0, 0,
515 (SCM bv, SCM index),
516 "Return the byte located at @var{index} in @var{bv}.")
517 #define FUNC_NAME s_scm_r6rs_bytevector_u8_ref
519 INTEGER_NATIVE_REF (8, signed);
521 #undef FUNC_NAME
523 SCM_DEFINE (scm_r6rs_bytevector_u8_set_x, "bytevector-u8-set!", 3, 0, 0,
524 (SCM bv, SCM index, SCM value),
525 "Return the octet located at @var{index} in @var{bv}.")
526 #define FUNC_NAME s_scm_r6rs_bytevector_u8_set_x
528 INTEGER_NATIVE_SET (8, unsigned);
530 #undef FUNC_NAME
532 SCM_DEFINE (scm_r6rs_bytevector_s8_set_x, "bytevector-s8-set!", 3, 0, 0,
533 (SCM bv, SCM index, SCM value),
534 "Return the octet located at @var{index} in @var{bv}.")
535 #define FUNC_NAME s_scm_r6rs_bytevector_u8_set_x
537 INTEGER_NATIVE_SET (8, signed);
539 #undef FUNC_NAME
541 #undef OCTET_ACCESSOR_PROLOGUE
544 SCM_DEFINE (scm_r6rs_bytevector_to_u8_list, "bytevector->u8-list", 1, 0, 0,
545 (SCM bv),
546 "Return a newly allocated list of octets containing the "
547 "contents of @var{bv}.")
548 #define FUNC_NAME s_scm_r6rs_bytevector_to_u8_list
550 SCM lst, pair;
551 unsigned c_len, i;
552 unsigned char *c_bv;
554 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv);
556 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
557 c_bv = (unsigned char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
559 lst = scm_make_list (scm_from_uint (c_len), SCM_UNSPECIFIED);
560 for (i = 0, pair = lst;
561 i < c_len;
562 i++, pair = SCM_CDR (pair))
564 SCM_SETCAR (pair, SCM_I_MAKINUM (c_bv[i]));
567 return lst;
569 #undef FUNC_NAME
571 SCM_DEFINE (scm_r6rs_u8_list_to_bytevector, "u8-list->bytevector", 1, 0, 0,
572 (SCM lst),
573 "Turn @var{lst}, a list of octets, into a bytevector.")
574 #define FUNC_NAME s_scm_r6rs_u8_list_to_bytevector
576 SCM bv, item;
577 long c_len, i;
578 unsigned char *c_bv;
580 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len);
582 bv = make_bytevector (c_len);
583 c_bv = (unsigned char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
585 for (i = 0; i < c_len; lst = SCM_CDR (lst), i++)
587 item = SCM_CAR (lst);
589 if (SCM_LIKELY (SCM_I_INUMP (item)))
591 long c_item;
593 c_item = SCM_I_INUM (item);
594 if (SCM_LIKELY ((c_item >= 0) && (c_item < 256)))
595 c_bv[i] = (unsigned char) c_item;
596 else
597 goto type_error;
599 else
600 goto type_error;
603 return bv;
605 type_error:
606 scm_wrong_type_arg (FUNC_NAME, 1, item);
608 return SCM_BOOL_F;
610 #undef FUNC_NAME
612 /* Compute the two's complement of VALUE (a positive integer) on SIZE octets
613 using (2^(SIZE * 8) - VALUE). */
614 static inline void
615 twos_complement (mpz_t value, size_t size)
617 unsigned long bit_count;
619 /* We expect BIT_COUNT to fit in a unsigned long thanks to the range
620 checking on SIZE performed earlier. */
621 bit_count = (unsigned long) size << 3UL;
623 if (SCM_LIKELY (bit_count < sizeof (unsigned long)))
624 mpz_ui_sub (value, 1UL << bit_count, value);
625 else
627 mpz_t max;
629 mpz_init (max);
630 mpz_ui_pow_ui (max, 2, bit_count);
631 mpz_sub (value, max, value);
632 mpz_clear (max);
636 static inline SCM
637 bytevector_large_ref (const char *c_bv, size_t c_size, int signed_p,
638 SCM endianness)
640 SCM result;
641 mpz_t c_mpz;
642 int c_endianness, negative_p = 0;
644 if (signed_p)
646 if (scm_is_eq (endianness, scm_sym_big))
647 negative_p = c_bv[0] & 0x80;
648 else
649 negative_p = c_bv[c_size - 1] & 0x80;
652 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
654 mpz_init (c_mpz);
655 mpz_import (c_mpz, 1 /* 1 word */, 1 /* word order doesn't matter */,
656 c_size /* word is C_SIZE-byte long */,
657 c_endianness,
658 0 /* nails */, c_bv);
660 if (signed_p && negative_p)
662 twos_complement (c_mpz, c_size);
663 mpz_neg (c_mpz, c_mpz);
666 result = scm_from_mpz (c_mpz);
667 mpz_clear (c_mpz); /* FIXME: Needed? */
669 return result;
672 static inline int
673 bytevector_large_set (char *c_bv, size_t c_size, int signed_p,
674 SCM value, SCM endianness)
676 mpz_t c_mpz;
677 int c_endianness, c_sign, err = 0;
679 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
681 mpz_init (c_mpz);
682 scm_to_mpz (value, c_mpz);
684 c_sign = mpz_sgn (c_mpz);
685 if (c_sign < 0)
687 if (SCM_LIKELY (signed_p))
689 mpz_neg (c_mpz, c_mpz);
690 twos_complement (c_mpz, c_size);
692 else
694 err = -1;
695 goto finish;
699 if (c_sign == 0)
700 /* Zero. */
701 memset (c_bv, 0, c_size);
702 else
704 size_t word_count, value_size;
706 value_size = (mpz_sizeinbase (c_mpz, 2) + (8 * c_size)) / (8 * c_size);
707 if (SCM_UNLIKELY (value_size > c_size))
709 err = -2;
710 goto finish;
714 mpz_export (c_bv, &word_count, 1 /* word order doesn't matter */,
715 c_size, c_endianness,
716 0 /* nails */, c_mpz);
717 if (SCM_UNLIKELY (word_count != 1))
718 /* Shouldn't happen since we already checked with VALUE_SIZE. */
719 abort ();
722 finish:
723 mpz_clear (c_mpz);
725 return err;
728 #define GENERIC_INTEGER_ACCESSOR_PROLOGUE(_sign) \
729 unsigned c_len, c_index, c_size; \
730 char *c_bv; \
732 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv); \
733 c_index = scm_to_uint (index); \
734 c_size = scm_to_uint (size); \
736 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv); \
737 c_bv = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv); \
739 /* C_SIZE must have its 3 higher bits set to zero so that \
740 multiplying it by 8 yields a number that fits in an \
741 unsigned long. */ \
742 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
743 scm_out_of_range (FUNC_NAME, size); \
744 if (SCM_UNLIKELY (c_index + c_size > c_len)) \
745 scm_out_of_range (FUNC_NAME, index);
748 /* Template of an integer reference function. */
749 #define GENERIC_INTEGER_REF(_sign) \
750 SCM result; \
752 if (c_size < 3) \
754 int swap; \
755 _sign int value; \
757 swap = !scm_is_eq (endianness, native_endianness); \
758 switch (c_size) \
760 case 1: \
762 _sign char c_value8; \
763 memcpy (&c_value8, c_bv, 1); \
764 value = c_value8; \
766 break; \
767 case 2: \
769 INT_TYPE (16, _sign) c_value16; \
770 memcpy (&c_value16, c_bv, 2); \
771 if (swap) \
772 value = (INT_TYPE (16, _sign)) bswap_16 (c_value16); \
773 else \
774 value = c_value16; \
776 break; \
777 default: \
778 abort (); \
781 result = SCM_I_MAKINUM ((_sign int) value); \
783 else \
784 result = bytevector_large_ref ((char *) c_bv, \
785 c_size, SIGNEDNESS (_sign), \
786 endianness); \
788 return result;
790 static inline SCM
791 bytevector_signed_ref (const char *c_bv, size_t c_size, SCM endianness)
793 GENERIC_INTEGER_REF (signed);
796 static inline SCM
797 bytevector_unsigned_ref (const char *c_bv, size_t c_size, SCM endianness)
799 GENERIC_INTEGER_REF (unsigned);
803 /* Template of an integer assignment function. */
804 #define GENERIC_INTEGER_SET(_sign) \
805 if (c_size < 3) \
807 _sign int c_value; \
809 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
810 goto range_error; \
812 c_value = SCM_I_INUM (value); \
813 switch (c_size) \
815 case 1: \
816 if (SCM_LIKELY (INT_VALID_P (8, _sign) (c_value))) \
818 _sign char c_value8; \
819 c_value8 = (_sign char) c_value; \
820 memcpy (c_bv, &c_value8, 1); \
822 else \
823 goto range_error; \
824 break; \
826 case 2: \
827 if (SCM_LIKELY (INT_VALID_P (16, _sign) (c_value))) \
829 int swap; \
830 INT_TYPE (16, _sign) c_value16; \
832 swap = !scm_is_eq (endianness, native_endianness); \
834 c_value16 = \
835 swap ? bswap_16 (c_value) : c_value; \
836 memcpy (c_bv, &c_value16, 2); \
838 else \
839 goto range_error; \
840 break; \
842 default: \
843 abort (); \
846 else \
848 int err; \
850 err = bytevector_large_set (c_bv, c_size, \
851 SIGNEDNESS (_sign), \
852 value, endianness); \
853 if (err) \
854 goto range_error; \
857 return; \
859 range_error: \
860 scm_out_of_range (FUNC_NAME, value); \
861 return;
863 static inline void
864 bytevector_signed_set (char *c_bv, size_t c_size,
865 SCM value, SCM endianness,
866 const char *func_name)
867 #define FUNC_NAME func_name
869 GENERIC_INTEGER_SET (signed);
871 #undef FUNC_NAME
873 static inline void
874 bytevector_unsigned_set (char *c_bv, size_t c_size,
875 SCM value, SCM endianness,
876 const char *func_name)
877 #define FUNC_NAME func_name
879 GENERIC_INTEGER_SET (unsigned);
881 #undef FUNC_NAME
883 #undef GENERIC_INTEGER_SET
884 #undef GENERIC_INTEGER_REF
887 SCM_DEFINE (scm_r6rs_bytevector_uint_ref, "bytevector-uint-ref", 4, 0, 0,
888 (SCM bv, SCM index, SCM endianness, SCM size),
889 "Return the @var{size}-octet long unsigned integer at index "
890 "@var{index} in @var{bv}.")
891 #define FUNC_NAME s_scm_r6rs_bytevector_uint_ref
893 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
895 return (bytevector_unsigned_ref (&c_bv[c_index], c_size, endianness));
897 #undef FUNC_NAME
899 SCM_DEFINE (scm_r6rs_bytevector_sint_ref, "bytevector-sint-ref", 4, 0, 0,
900 (SCM bv, SCM index, SCM endianness, SCM size),
901 "Return the @var{size}-octet long unsigned integer at index "
902 "@var{index} in @var{bv}.")
903 #define FUNC_NAME s_scm_r6rs_bytevector_sint_ref
905 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
907 return (bytevector_signed_ref (&c_bv[c_index], c_size, endianness));
909 #undef FUNC_NAME
911 SCM_DEFINE (scm_r6rs_bytevector_uint_set_x, "bytevector-uint-set!", 5, 0, 0,
912 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
913 "Set the @var{size}-octet long unsigned integer at @var{index} "
914 "to @var{value}.")
915 #define FUNC_NAME s_scm_r6rs_bytevector_uint_set_x
917 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
919 bytevector_unsigned_set (&c_bv[c_index], c_size, value, endianness,
920 FUNC_NAME);
922 return SCM_UNSPECIFIED;
924 #undef FUNC_NAME
926 SCM_DEFINE (scm_r6rs_bytevector_sint_set_x, "bytevector-sint-set!", 5, 0, 0,
927 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
928 "Set the @var{size}-octet long signed integer at @var{index} "
929 "to @var{value}.")
930 #define FUNC_NAME s_scm_r6rs_bytevector_sint_set_x
932 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
934 bytevector_signed_set (&c_bv[c_index], c_size, value, endianness,
935 FUNC_NAME);
937 return SCM_UNSPECIFIED;
939 #undef FUNC_NAME
943 /* Operations on integers of arbitrary size. */
945 #define INTEGERS_TO_LIST(_sign) \
946 SCM lst, pair; \
947 size_t i, c_len, c_size; \
949 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv); \
950 SCM_VALIDATE_SYMBOL (2, endianness); \
951 c_size = scm_to_uint (size); \
953 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv); \
954 if (SCM_UNLIKELY (c_len == 0)) \
955 lst = SCM_EOL; \
956 else if (SCM_UNLIKELY (c_len < c_size)) \
957 scm_out_of_range (FUNC_NAME, size); \
958 else \
960 const char *c_bv; \
962 c_bv = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv); \
964 lst = scm_make_list (scm_from_uint (c_len / c_size), \
965 SCM_UNSPECIFIED); \
966 for (i = 0, pair = lst; \
967 i <= c_len - c_size; \
968 i += c_size, c_bv += c_size, pair = SCM_CDR (pair)) \
970 SCM_SETCAR (pair, \
971 bytevector_ ## _sign ## _ref (c_bv, c_size, \
972 endianness)); \
976 return lst;
978 SCM_DEFINE (scm_r6rs_bytevector_to_sint_list, "bytevector->sint-list",
979 3, 0, 0,
980 (SCM bv, SCM endianness, SCM size),
981 "Return a list of signed integers of @var{size} octets "
982 "representing the contents of @var{bv}.")
983 #define FUNC_NAME s_scm_r6rs_bytevector_to_sint_list
985 INTEGERS_TO_LIST (signed);
987 #undef FUNC_NAME
989 SCM_DEFINE (scm_r6rs_bytevector_to_uint_list, "bytevector->uint-list",
990 3, 0, 0,
991 (SCM bv, SCM endianness, SCM size),
992 "Return a list of unsigned integers of @var{size} octets "
993 "representing the contents of @var{bv}.")
994 #define FUNC_NAME s_scm_r6rs_bytevector_to_uint_list
996 INTEGERS_TO_LIST (unsigned);
998 #undef FUNC_NAME
1000 #undef INTEGER_TO_LIST
1003 #define INTEGER_LIST_TO_BYTEVECTOR(_sign) \
1004 SCM bv; \
1005 long c_len; \
1006 size_t c_size; \
1007 char *c_bv, *c_bv_ptr; \
1009 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len); \
1010 SCM_VALIDATE_SYMBOL (2, endianness); \
1011 c_size = scm_to_uint (size); \
1013 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
1014 scm_out_of_range (FUNC_NAME, size); \
1016 bv = make_bytevector (c_len * c_size); \
1017 c_bv = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv); \
1019 for (c_bv_ptr = c_bv; \
1020 !scm_is_null (lst); \
1021 lst = SCM_CDR (lst), c_bv_ptr += c_size) \
1023 bytevector_ ## _sign ## _set (c_bv_ptr, c_size, \
1024 SCM_CAR (lst), endianness, \
1025 FUNC_NAME); \
1028 return bv;
1031 SCM_DEFINE (scm_r6rs_uint_list_to_bytevector, "uint-list->bytevector",
1032 3, 0, 0,
1033 (SCM lst, SCM endianness, SCM size),
1034 "Return a bytevector containing the unsigned integers "
1035 "listed in @var{lst} and encoded on @var{size} octets "
1036 "according to @var{endianness}.")
1037 #define FUNC_NAME s_scm_r6rs_uint_list_to_bytevector
1039 INTEGER_LIST_TO_BYTEVECTOR (unsigned);
1041 #undef FUNC_NAME
1043 SCM_DEFINE (scm_r6rs_sint_list_to_bytevector, "sint-list->bytevector",
1044 3, 0, 0,
1045 (SCM lst, SCM endianness, SCM size),
1046 "Return a bytevector containing the signed integers "
1047 "listed in @var{lst} and encoded on @var{size} octets "
1048 "according to @var{endianness}.")
1049 #define FUNC_NAME s_scm_r6rs_sint_list_to_bytevector
1051 INTEGER_LIST_TO_BYTEVECTOR (signed);
1053 #undef FUNC_NAME
1055 #undef INTEGER_LIST_TO_BYTEVECTOR
1059 /* Operations on 16-bit integers. */
1061 SCM_DEFINE (scm_r6rs_bytevector_u16_ref, "bytevector-u16-ref",
1062 3, 0, 0,
1063 (SCM bv, SCM index, SCM endianness),
1064 "Return the unsigned 16-bit integer from @var{bv} at "
1065 "@var{index}.")
1066 #define FUNC_NAME s_scm_r6rs_bytevector_u16_ref
1068 INTEGER_REF (16, unsigned);
1070 #undef FUNC_NAME
1072 SCM_DEFINE (scm_r6rs_bytevector_s16_ref, "bytevector-s16-ref",
1073 3, 0, 0,
1074 (SCM bv, SCM index, SCM endianness),
1075 "Return the signed 16-bit integer from @var{bv} at "
1076 "@var{index}.")
1077 #define FUNC_NAME s_scm_r6rs_bytevector_s16_ref
1079 INTEGER_REF (16, signed);
1081 #undef FUNC_NAME
1083 SCM_DEFINE (scm_r6rs_bytevector_u16_native_ref, "bytevector-u16-native-ref",
1084 2, 0, 0,
1085 (SCM bv, SCM index),
1086 "Return the unsigned 16-bit integer from @var{bv} at "
1087 "@var{index} using the native endianness.")
1088 #define FUNC_NAME s_scm_r6rs_bytevector_u16_native_ref
1090 INTEGER_NATIVE_REF (16, unsigned);
1092 #undef FUNC_NAME
1094 SCM_DEFINE (scm_r6rs_bytevector_s16_native_ref, "bytevector-s16-native-ref",
1095 2, 0, 0,
1096 (SCM bv, SCM index),
1097 "Return the unsigned 16-bit integer from @var{bv} at "
1098 "@var{index} using the native endianness.")
1099 #define FUNC_NAME s_scm_r6rs_bytevector_u16_native_ref
1101 INTEGER_NATIVE_REF (16, signed);
1103 #undef FUNC_NAME
1105 SCM_DEFINE (scm_r6rs_bytevector_u16_set_x, "bytevector-u16-set!",
1106 4, 0, 0,
1107 (SCM bv, SCM index, SCM value, SCM endianness),
1108 "Store @var{value} in @var{bv} at @var{index} according to "
1109 "@var{endianness}.")
1110 #define FUNC_NAME s_scm_r6rs_bytevector_u16_set_x
1112 INTEGER_SET (16, unsigned);
1114 #undef FUNC_NAME
1116 SCM_DEFINE (scm_r6rs_bytevector_s16_set_x, "bytevector-s16-set!",
1117 4, 0, 0,
1118 (SCM bv, SCM index, SCM value, SCM endianness),
1119 "Store @var{value} in @var{bv} at @var{index} according to "
1120 "@var{endianness}.")
1121 #define FUNC_NAME s_scm_r6rs_bytevector_s16_set_x
1123 INTEGER_SET (16, signed);
1125 #undef FUNC_NAME
1127 SCM_DEFINE (scm_r6rs_bytevector_u16_native_set_x, "bytevector-u16-native-set!",
1128 3, 0, 0,
1129 (SCM bv, SCM index, SCM value),
1130 "Store the unsigned integer @var{value} at index @var{index} "
1131 "of @var{bv} using the native endianness.")
1132 #define FUNC_NAME s_scm_r6rs_bytevector_u16_native_set_x
1134 INTEGER_NATIVE_SET (16, unsigned);
1136 #undef FUNC_NAME
1138 SCM_DEFINE (scm_r6rs_bytevector_s16_native_set_x, "bytevector-s16-native-set!",
1139 3, 0, 0,
1140 (SCM bv, SCM index, SCM value),
1141 "Store the signed integer @var{value} at index @var{index} "
1142 "of @var{bv} using the native endianness.")
1143 #define FUNC_NAME s_scm_r6rs_bytevector_u16_native_set_x
1145 INTEGER_NATIVE_SET (16, signed);
1147 #undef FUNC_NAME
1151 /* Operations on 32-bit integers. */
1153 /* Unfortunately, on 32-bit machines `SCM' is not large enough to hold
1154 arbitrary 32-bit integers. Thus we fall back to using the
1155 `large_{ref,set}' variants on 32-bit machines. */
1157 #define LARGE_INTEGER_REF(_len, _sign) \
1158 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1159 SCM_VALIDATE_SYMBOL (3, endianness); \
1161 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1162 SIGNEDNESS (_sign), endianness));
1164 #define LARGE_INTEGER_SET(_len, _sign) \
1165 int err; \
1166 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1167 SCM_VALIDATE_SYMBOL (4, endianness); \
1169 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1170 SIGNEDNESS (_sign), value, endianness); \
1171 if (SCM_UNLIKELY (err)) \
1172 scm_out_of_range (FUNC_NAME, value); \
1174 return SCM_UNSPECIFIED;
1176 #define LARGE_INTEGER_NATIVE_REF(_len, _sign) \
1177 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1178 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1179 SIGNEDNESS (_sign), native_endianness));
1181 #define LARGE_INTEGER_NATIVE_SET(_len, _sign) \
1182 int err; \
1183 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1185 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1186 SIGNEDNESS (_sign), value, \
1187 native_endianness); \
1188 if (SCM_UNLIKELY (err)) \
1189 scm_out_of_range (FUNC_NAME, value); \
1191 return SCM_UNSPECIFIED;
1194 SCM_DEFINE (scm_r6rs_bytevector_u32_ref, "bytevector-u32-ref",
1195 3, 0, 0,
1196 (SCM bv, SCM index, SCM endianness),
1197 "Return the unsigned 32-bit integer from @var{bv} at "
1198 "@var{index}.")
1199 #define FUNC_NAME s_scm_r6rs_bytevector_u32_ref
1201 #if SIZEOF_VOID_P > 4
1202 INTEGER_REF (32, unsigned);
1203 #else
1204 LARGE_INTEGER_REF (32, unsigned);
1205 #endif
1207 #undef FUNC_NAME
1209 SCM_DEFINE (scm_r6rs_bytevector_s32_ref, "bytevector-s32-ref",
1210 3, 0, 0,
1211 (SCM bv, SCM index, SCM endianness),
1212 "Return the signed 32-bit integer from @var{bv} at "
1213 "@var{index}.")
1214 #define FUNC_NAME s_scm_r6rs_bytevector_s32_ref
1216 #if SIZEOF_VOID_P > 4
1217 INTEGER_REF (32, signed);
1218 #else
1219 LARGE_INTEGER_REF (32, signed);
1220 #endif
1222 #undef FUNC_NAME
1224 SCM_DEFINE (scm_r6rs_bytevector_u32_native_ref, "bytevector-u32-native-ref",
1225 2, 0, 0,
1226 (SCM bv, SCM index),
1227 "Return the unsigned 32-bit integer from @var{bv} at "
1228 "@var{index} using the native endianness.")
1229 #define FUNC_NAME s_scm_r6rs_bytevector_u32_native_ref
1231 #if SIZEOF_VOID_P > 4
1232 INTEGER_NATIVE_REF (32, unsigned);
1233 #else
1234 LARGE_INTEGER_NATIVE_REF (32, unsigned);
1235 #endif
1237 #undef FUNC_NAME
1239 SCM_DEFINE (scm_r6rs_bytevector_s32_native_ref, "bytevector-s32-native-ref",
1240 2, 0, 0,
1241 (SCM bv, SCM index),
1242 "Return the unsigned 32-bit integer from @var{bv} at "
1243 "@var{index} using the native endianness.")
1244 #define FUNC_NAME s_scm_r6rs_bytevector_u32_native_ref
1246 #if SIZEOF_VOID_P > 4
1247 INTEGER_NATIVE_REF (32, signed);
1248 #else
1249 LARGE_INTEGER_NATIVE_REF (32, signed);
1250 #endif
1252 #undef FUNC_NAME
1254 SCM_DEFINE (scm_r6rs_bytevector_u32_set_x, "bytevector-u32-set!",
1255 4, 0, 0,
1256 (SCM bv, SCM index, SCM value, SCM endianness),
1257 "Store @var{value} in @var{bv} at @var{index} according to "
1258 "@var{endianness}.")
1259 #define FUNC_NAME s_scm_r6rs_bytevector_u32_set_x
1261 #if SIZEOF_VOID_P > 4
1262 INTEGER_SET (32, unsigned);
1263 #else
1264 LARGE_INTEGER_SET (32, unsigned);
1265 #endif
1267 #undef FUNC_NAME
1269 SCM_DEFINE (scm_r6rs_bytevector_s32_set_x, "bytevector-s32-set!",
1270 4, 0, 0,
1271 (SCM bv, SCM index, SCM value, SCM endianness),
1272 "Store @var{value} in @var{bv} at @var{index} according to "
1273 "@var{endianness}.")
1274 #define FUNC_NAME s_scm_r6rs_bytevector_s32_set_x
1276 #if SIZEOF_VOID_P > 4
1277 INTEGER_SET (32, signed);
1278 #else
1279 LARGE_INTEGER_SET (32, signed);
1280 #endif
1282 #undef FUNC_NAME
1284 SCM_DEFINE (scm_r6rs_bytevector_u32_native_set_x, "bytevector-u32-native-set!",
1285 3, 0, 0,
1286 (SCM bv, SCM index, SCM value),
1287 "Store the unsigned integer @var{value} at index @var{index} "
1288 "of @var{bv} using the native endianness.")
1289 #define FUNC_NAME s_scm_r6rs_bytevector_u32_native_set_x
1291 #if SIZEOF_VOID_P > 4
1292 INTEGER_NATIVE_SET (32, unsigned);
1293 #else
1294 LARGE_INTEGER_NATIVE_SET (32, unsigned);
1295 #endif
1297 #undef FUNC_NAME
1299 SCM_DEFINE (scm_r6rs_bytevector_s32_native_set_x, "bytevector-s32-native-set!",
1300 3, 0, 0,
1301 (SCM bv, SCM index, SCM value),
1302 "Store the signed integer @var{value} at index @var{index} "
1303 "of @var{bv} using the native endianness.")
1304 #define FUNC_NAME s_scm_r6rs_bytevector_u32_native_set_x
1306 #if SIZEOF_VOID_P > 4
1307 INTEGER_NATIVE_SET (32, signed);
1308 #else
1309 LARGE_INTEGER_NATIVE_SET (32, signed);
1310 #endif
1312 #undef FUNC_NAME
1316 /* Operations on 64-bit integers. */
1318 /* For 64-bit integers, we use only the `large_{ref,set}' variant. */
1320 SCM_DEFINE (scm_r6rs_bytevector_u64_ref, "bytevector-u64-ref",
1321 3, 0, 0,
1322 (SCM bv, SCM index, SCM endianness),
1323 "Return the unsigned 64-bit integer from @var{bv} at "
1324 "@var{index}.")
1325 #define FUNC_NAME s_scm_r6rs_bytevector_u64_ref
1327 LARGE_INTEGER_REF (64, unsigned);
1329 #undef FUNC_NAME
1331 SCM_DEFINE (scm_r6rs_bytevector_s64_ref, "bytevector-s64-ref",
1332 3, 0, 0,
1333 (SCM bv, SCM index, SCM endianness),
1334 "Return the signed 64-bit integer from @var{bv} at "
1335 "@var{index}.")
1336 #define FUNC_NAME s_scm_r6rs_bytevector_s64_ref
1338 LARGE_INTEGER_REF (64, signed);
1340 #undef FUNC_NAME
1342 SCM_DEFINE (scm_r6rs_bytevector_u64_native_ref, "bytevector-u64-native-ref",
1343 2, 0, 0,
1344 (SCM bv, SCM index),
1345 "Return the unsigned 64-bit integer from @var{bv} at "
1346 "@var{index} using the native endianness.")
1347 #define FUNC_NAME s_scm_r6rs_bytevector_u64_native_ref
1349 LARGE_INTEGER_NATIVE_REF (64, unsigned);
1351 #undef FUNC_NAME
1353 SCM_DEFINE (scm_r6rs_bytevector_s64_native_ref, "bytevector-s64-native-ref",
1354 2, 0, 0,
1355 (SCM bv, SCM index),
1356 "Return the unsigned 64-bit integer from @var{bv} at "
1357 "@var{index} using the native endianness.")
1358 #define FUNC_NAME s_scm_r6rs_bytevector_u64_native_ref
1360 LARGE_INTEGER_NATIVE_REF (64, signed);
1362 #undef FUNC_NAME
1364 SCM_DEFINE (scm_r6rs_bytevector_u64_set_x, "bytevector-u64-set!",
1365 4, 0, 0,
1366 (SCM bv, SCM index, SCM value, SCM endianness),
1367 "Store @var{value} in @var{bv} at @var{index} according to "
1368 "@var{endianness}.")
1369 #define FUNC_NAME s_scm_r6rs_bytevector_u64_set_x
1371 LARGE_INTEGER_SET (64, unsigned);
1373 #undef FUNC_NAME
1375 SCM_DEFINE (scm_r6rs_bytevector_s64_set_x, "bytevector-s64-set!",
1376 4, 0, 0,
1377 (SCM bv, SCM index, SCM value, SCM endianness),
1378 "Store @var{value} in @var{bv} at @var{index} according to "
1379 "@var{endianness}.")
1380 #define FUNC_NAME s_scm_r6rs_bytevector_s64_set_x
1382 LARGE_INTEGER_SET (64, signed);
1384 #undef FUNC_NAME
1386 SCM_DEFINE (scm_r6rs_bytevector_u64_native_set_x, "bytevector-u64-native-set!",
1387 3, 0, 0,
1388 (SCM bv, SCM index, SCM value),
1389 "Store the unsigned integer @var{value} at index @var{index} "
1390 "of @var{bv} using the native endianness.")
1391 #define FUNC_NAME s_scm_r6rs_bytevector_u64_native_set_x
1393 LARGE_INTEGER_NATIVE_SET (64, unsigned);
1395 #undef FUNC_NAME
1397 SCM_DEFINE (scm_r6rs_bytevector_s64_native_set_x, "bytevector-s64-native-set!",
1398 3, 0, 0,
1399 (SCM bv, SCM index, SCM value),
1400 "Store the signed integer @var{value} at index @var{index} "
1401 "of @var{bv} using the native endianness.")
1402 #define FUNC_NAME s_scm_r6rs_bytevector_u64_native_set_x
1404 LARGE_INTEGER_NATIVE_SET (64, signed);
1406 #undef FUNC_NAME
1410 /* Operations on IEEE-754 numbers. */
1412 /* There are two possible word endians, visible in glibc's <ieee754.h>.
1413 However, in R6RS, when the endianness is `little', little endian is
1414 assumed for both the byte order and the word order. This is clear from
1415 Section 2.1 of R6RS-lib (in response to
1416 http://www.r6rs.org/formal-comments/comment-187.txt). */
1419 /* Convert to/from a floating-point number with different endianness. This
1420 method is probably not the most efficient but it should be portable. */
1422 static inline void
1423 float_to_foreign_endianness (union scm_r6rs_ieee754_float *target,
1424 float source)
1426 union scm_r6rs_ieee754_float src;
1428 src.f = source;
1430 #ifdef WORDS_BIGENDIAN
1431 /* Assuming little endian for both byte and word order. */
1432 target->little_endian.negative = src.big_endian.negative;
1433 target->little_endian.exponent = src.big_endian.exponent;
1434 target->little_endian.mantissa = src.big_endian.mantissa;
1435 #else
1436 target->big_endian.negative = src.little_endian.negative;
1437 target->big_endian.exponent = src.little_endian.exponent;
1438 target->big_endian.mantissa = src.little_endian.mantissa;
1439 #endif
1442 static inline float
1443 float_from_foreign_endianness (const union scm_r6rs_ieee754_float *source)
1445 union scm_r6rs_ieee754_float result;
1447 #ifdef WORDS_BIGENDIAN
1448 /* Assuming little endian for both byte and word order. */
1449 result.big_endian.negative = source->little_endian.negative;
1450 result.big_endian.exponent = source->little_endian.exponent;
1451 result.big_endian.mantissa = source->little_endian.mantissa;
1452 #else
1453 result.little_endian.negative = source->big_endian.negative;
1454 result.little_endian.exponent = source->big_endian.exponent;
1455 result.little_endian.mantissa = source->big_endian.mantissa;
1456 #endif
1458 return (result.f);
1461 static inline void
1462 double_to_foreign_endianness (union scm_r6rs_ieee754_double *target,
1463 double source)
1465 union scm_r6rs_ieee754_double src;
1467 src.d = source;
1469 #ifdef WORDS_BIGENDIAN
1470 /* Assuming little endian for both byte and word order. */
1471 target->little_little_endian.negative = src.big_endian.negative;
1472 target->little_little_endian.exponent = src.big_endian.exponent;
1473 target->little_little_endian.mantissa0 = src.big_endian.mantissa0;
1474 target->little_little_endian.mantissa1 = src.big_endian.mantissa1;
1475 #else
1476 target->big_endian.negative = src.little_little_endian.negative;
1477 target->big_endian.exponent = src.little_little_endian.exponent;
1478 target->big_endian.mantissa0 = src.little_little_endian.mantissa0;
1479 target->big_endian.mantissa1 = src.little_little_endian.mantissa1;
1480 #endif
1483 static inline double
1484 double_from_foreign_endianness (const union scm_r6rs_ieee754_double *source)
1486 union scm_r6rs_ieee754_double result;
1488 #ifdef WORDS_BIGENDIAN
1489 /* Assuming little endian for both byte and word order. */
1490 result.big_endian.negative = source->little_little_endian.negative;
1491 result.big_endian.exponent = source->little_little_endian.exponent;
1492 result.big_endian.mantissa0 = source->little_little_endian.mantissa0;
1493 result.big_endian.mantissa1 = source->little_little_endian.mantissa1;
1494 #else
1495 result.little_little_endian.negative = source->big_endian.negative;
1496 result.little_little_endian.exponent = source->big_endian.exponent;
1497 result.little_little_endian.mantissa0 = source->big_endian.mantissa0;
1498 result.little_little_endian.mantissa1 = source->big_endian.mantissa1;
1499 #endif
1501 return (result.d);
1504 /* Template macros to abstract over doubles and floats.
1505 XXX: Guile can only convert to/from doubles. */
1506 #define IEEE754_UNION(_c_type) union scm_r6rs_ieee754_ ## _c_type
1507 #define IEEE754_TO_SCM(_c_type) scm_from_double
1508 #define IEEE754_FROM_SCM(_c_type) scm_to_double
1509 #define IEEE754_FROM_FOREIGN_ENDIANNESS(_c_type) \
1510 _c_type ## _from_foreign_endianness
1511 #define IEEE754_TO_FOREIGN_ENDIANNESS(_c_type) \
1512 _c_type ## _to_foreign_endianness
1515 /* Templace getters and setters. */
1517 #define IEEE754_ACCESSOR_PROLOGUE(_type) \
1518 INTEGER_ACCESSOR_PROLOGUE (sizeof (_type) << 3UL, signed);
1520 #define IEEE754_REF(_type) \
1521 _type c_result; \
1523 IEEE754_ACCESSOR_PROLOGUE (_type); \
1524 SCM_VALIDATE_SYMBOL (3, endianness); \
1526 if (scm_is_eq (endianness, native_endianness)) \
1527 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1528 else \
1530 IEEE754_UNION (_type) c_raw; \
1532 memcpy (&c_raw, &c_bv[c_index], sizeof (c_raw)); \
1533 c_result = \
1534 IEEE754_FROM_FOREIGN_ENDIANNESS (_type) (&c_raw); \
1537 return (IEEE754_TO_SCM (_type) (c_result));
1539 #define IEEE754_NATIVE_REF(_type) \
1540 _type c_result; \
1542 IEEE754_ACCESSOR_PROLOGUE (_type); \
1544 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1545 return (IEEE754_TO_SCM (_type) (c_result));
1547 #define IEEE754_SET(_type) \
1548 _type c_value; \
1550 IEEE754_ACCESSOR_PROLOGUE (_type); \
1551 SCM_VALIDATE_REAL (3, value); \
1552 SCM_VALIDATE_SYMBOL (4, endianness); \
1553 c_value = IEEE754_FROM_SCM (_type) (value); \
1555 if (scm_is_eq (endianness, native_endianness)) \
1556 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1557 else \
1559 IEEE754_UNION (_type) c_raw; \
1561 IEEE754_TO_FOREIGN_ENDIANNESS (_type) (&c_raw, c_value); \
1562 memcpy (&c_bv[c_index], &c_raw, sizeof (c_raw)); \
1565 return SCM_UNSPECIFIED;
1567 #define IEEE754_NATIVE_SET(_type) \
1568 _type c_value; \
1570 IEEE754_ACCESSOR_PROLOGUE (_type); \
1571 SCM_VALIDATE_REAL (3, value); \
1572 c_value = IEEE754_FROM_SCM (_type) (value); \
1574 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1575 return SCM_UNSPECIFIED;
1578 /* Single precision. */
1580 SCM_DEFINE (scm_r6rs_bytevector_ieee_single_ref,
1581 "bytevector-ieee-single-ref",
1582 3, 0, 0,
1583 (SCM bv, SCM index, SCM endianness),
1584 "Return the IEEE-754 single from @var{bv} at "
1585 "@var{index}.")
1586 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_single_ref
1588 IEEE754_REF (float);
1590 #undef FUNC_NAME
1592 SCM_DEFINE (scm_r6rs_bytevector_ieee_single_native_ref,
1593 "bytevector-ieee-single-native-ref",
1594 2, 0, 0,
1595 (SCM bv, SCM index),
1596 "Return the IEEE-754 single from @var{bv} at "
1597 "@var{index} using the native endianness.")
1598 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_single_native_ref
1600 IEEE754_NATIVE_REF (float);
1602 #undef FUNC_NAME
1604 SCM_DEFINE (scm_r6rs_bytevector_ieee_single_set_x,
1605 "bytevector-ieee-single-set!",
1606 4, 0, 0,
1607 (SCM bv, SCM index, SCM value, SCM endianness),
1608 "Store real @var{value} in @var{bv} at @var{index} according to "
1609 "@var{endianness}.")
1610 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_single_set_x
1612 IEEE754_SET (float);
1614 #undef FUNC_NAME
1616 SCM_DEFINE (scm_r6rs_bytevector_ieee_single_native_set_x,
1617 "bytevector-ieee-single-native-set!",
1618 3, 0, 0,
1619 (SCM bv, SCM index, SCM value),
1620 "Store the real @var{value} at index @var{index} "
1621 "of @var{bv} using the native endianness.")
1622 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_single_native_set_x
1624 IEEE754_NATIVE_SET (float);
1626 #undef FUNC_NAME
1629 /* Double precision. */
1631 SCM_DEFINE (scm_r6rs_bytevector_ieee_double_ref,
1632 "bytevector-ieee-double-ref",
1633 3, 0, 0,
1634 (SCM bv, SCM index, SCM endianness),
1635 "Return the IEEE-754 double from @var{bv} at "
1636 "@var{index}.")
1637 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_double_ref
1639 IEEE754_REF (double);
1641 #undef FUNC_NAME
1643 SCM_DEFINE (scm_r6rs_bytevector_ieee_double_native_ref,
1644 "bytevector-ieee-double-native-ref",
1645 2, 0, 0,
1646 (SCM bv, SCM index),
1647 "Return the IEEE-754 double from @var{bv} at "
1648 "@var{index} using the native endianness.")
1649 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_double_native_ref
1651 IEEE754_NATIVE_REF (double);
1653 #undef FUNC_NAME
1655 SCM_DEFINE (scm_r6rs_bytevector_ieee_double_set_x,
1656 "bytevector-ieee-double-set!",
1657 4, 0, 0,
1658 (SCM bv, SCM index, SCM value, SCM endianness),
1659 "Store real @var{value} in @var{bv} at @var{index} according to "
1660 "@var{endianness}.")
1661 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_double_set_x
1663 IEEE754_SET (double);
1665 #undef FUNC_NAME
1667 SCM_DEFINE (scm_r6rs_bytevector_ieee_double_native_set_x,
1668 "bytevector-ieee-double-native-set!",
1669 3, 0, 0,
1670 (SCM bv, SCM index, SCM value),
1671 "Store the real @var{value} at index @var{index} "
1672 "of @var{bv} using the native endianness.")
1673 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_double_native_set_x
1675 IEEE754_NATIVE_SET (double);
1677 #undef FUNC_NAME
1680 #undef IEEE754_UNION
1681 #undef IEEE754_TO_SCM
1682 #undef IEEE754_FROM_SCM
1683 #undef IEEE754_FROM_FOREIGN_ENDIANNESS
1684 #undef IEEE754_TO_FOREIGN_ENDIANNESS
1685 #undef IEEE754_REF
1686 #undef IEEE754_NATIVE_REF
1687 #undef IEEE754_SET
1688 #undef IEEE754_NATIVE_SET
1691 /* Operations on strings. */
1694 /* Produce a function that returns the length of a UTF-encoded string. */
1695 #define UTF_STRLEN_FUNCTION(_utf_width) \
1696 static inline size_t \
1697 utf ## _utf_width ## _strlen (const uint ## _utf_width ## _t *str) \
1699 size_t len = 0; \
1700 const uint ## _utf_width ## _t *ptr; \
1701 for (ptr = str; \
1702 *ptr != 0; \
1703 ptr++) \
1705 len++; \
1708 return (len * ((_utf_width) / 8)); \
1711 UTF_STRLEN_FUNCTION (8)
1714 /* Return the length (in bytes) of STR, a UTF-(UTF_WIDTH) encoded string. */
1715 #define UTF_STRLEN(_utf_width, _str) \
1716 utf ## _utf_width ## _strlen (_str)
1718 /* Return the "portable" name of the UTF encoding of size UTF_WIDTH and
1719 ENDIANNESS (Gnulib's `iconv_open' module guarantees the portability of the
1720 encoding name). */
1721 static inline void
1722 utf_encoding_name (char *name, size_t utf_width, SCM endianness)
1724 strcpy (name, "UTF-");
1725 strcat (name, ((utf_width == 8)
1726 ? "8"
1727 : ((utf_width == 16)
1728 ? "16"
1729 : ((utf_width == 32)
1730 ? "32"
1731 : "??"))));
1732 strcat (name,
1733 ((scm_is_eq (endianness, scm_sym_big))
1734 ? "BE"
1735 : ((scm_is_eq (endianness, scm_sym_little))
1736 ? "LE"
1737 : "unknown")));
1740 /* Maximum length of a UTF encoding name. */
1741 #define MAX_UTF_ENCODING_NAME_LEN 16
1743 /* Produce the body of a `string->utf' function. */
1744 #define STRING_TO_UTF(_utf_width) \
1745 SCM utf; \
1746 int err; \
1747 char *c_str; \
1748 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1749 char *c_utf = NULL, *c_locale; \
1750 size_t c_strlen, c_raw_strlen, c_utf_len = 0; \
1752 SCM_VALIDATE_STRING (1, str); \
1753 if (endianness == SCM_UNDEFINED) \
1754 endianness = scm_sym_big; \
1755 else \
1756 SCM_VALIDATE_SYMBOL (2, endianness); \
1758 c_strlen = scm_c_string_length (str); \
1759 c_raw_strlen = c_strlen * ((_utf_width) / 8); \
1760 do \
1762 c_str = (char *) alloca (c_raw_strlen + 1); \
1763 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen); \
1765 while (c_raw_strlen > c_strlen); \
1766 c_str[c_raw_strlen] = '\0'; \
1768 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1770 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
1771 strcpy (c_locale, locale_charset ()); \
1773 err = mem_iconveh (c_str, c_raw_strlen, \
1774 c_locale, c_utf_name, \
1775 iconveh_question_mark, NULL, \
1776 &c_utf, &c_utf_len); \
1777 if (SCM_UNLIKELY (err)) \
1778 scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \
1779 scm_list_1 (str), err); \
1780 else \
1781 /* C_UTF is null-terminated. */ \
1782 utf = scm_r6rs_c_take_bytevector ((signed char *) c_utf, \
1783 c_utf_len); \
1785 return (utf);
1789 SCM_DEFINE (scm_r6rs_string_to_utf8, "string->utf8",
1790 1, 0, 0,
1791 (SCM str),
1792 "Return a newly allocated bytevector that contains the UTF-8 "
1793 "encoding of @var{str}.")
1794 #define FUNC_NAME s_scm_r6rs_string_to_utf8
1796 SCM utf;
1797 char *c_str;
1798 uint8_t *c_utf;
1799 size_t c_strlen, c_raw_strlen;
1801 SCM_VALIDATE_STRING (1, str);
1803 c_strlen = scm_c_string_length (str);
1804 c_raw_strlen = c_strlen;
1807 c_str = (char *) alloca (c_raw_strlen + 1);
1808 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen);
1810 while (c_raw_strlen > c_strlen);
1811 c_str[c_raw_strlen] = '\0';
1813 c_utf = u8_strconv_from_locale (c_str);
1814 if (SCM_UNLIKELY (c_utf == NULL))
1815 scm_syserror (FUNC_NAME);
1816 else
1817 /* C_UTF is null-terminated. */
1818 utf = scm_r6rs_c_take_bytevector ((signed char *) c_utf,
1819 UTF_STRLEN (8, c_utf));
1821 return (utf);
1823 #undef FUNC_NAME
1825 SCM_DEFINE (scm_r6rs_string_to_utf16, "string->utf16",
1826 1, 1, 0,
1827 (SCM str, SCM endianness),
1828 "Return a newly allocated bytevector that contains the UTF-16 "
1829 "encoding of @var{str}.")
1830 #define FUNC_NAME s_scm_r6rs_string_to_utf16
1832 STRING_TO_UTF (16);
1834 #undef FUNC_NAME
1836 SCM_DEFINE (scm_r6rs_string_to_utf32, "string->utf32",
1837 1, 1, 0,
1838 (SCM str, SCM endianness),
1839 "Return a newly allocated bytevector that contains the UTF-32 "
1840 "encoding of @var{str}.")
1841 #define FUNC_NAME s_scm_r6rs_string_to_utf32
1843 STRING_TO_UTF (32);
1845 #undef FUNC_NAME
1848 /* Produce the body of a function that converts a UTF-encoded bytevector to a
1849 string. */
1850 #define UTF_TO_STRING(_utf_width) \
1851 SCM str = SCM_BOOL_F; \
1852 int err; \
1853 char *c_str = NULL, *c_locale; \
1854 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1855 const char *c_utf; \
1856 size_t c_strlen = 0, c_utf_len; \
1858 SCM_VALIDATE_R6RS_BYTEVECTOR (1, utf); \
1859 if (endianness == SCM_UNDEFINED) \
1860 endianness = scm_sym_big; \
1861 else \
1862 SCM_VALIDATE_SYMBOL (2, endianness); \
1864 c_utf_len = SCM_R6RS_BYTEVECTOR_LENGTH (utf); \
1865 c_utf = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (utf); \
1866 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1868 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
1869 strcpy (c_locale, locale_charset ()); \
1871 err = mem_iconveh (c_utf, c_utf_len, \
1872 c_utf_name, c_locale, \
1873 iconveh_question_mark, NULL, \
1874 &c_str, &c_strlen); \
1875 if (SCM_UNLIKELY (err)) \
1876 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A", \
1877 scm_list_1 (utf), err); \
1878 else \
1879 /* C_STR is null-terminated. */ \
1880 str = scm_take_locale_stringn (c_str, c_strlen); \
1882 return (str);
1885 SCM_DEFINE (scm_r6rs_utf8_to_string, "utf8->string",
1886 1, 0, 0,
1887 (SCM utf),
1888 "Return a newly allocate string that contains from the UTF-8-"
1889 "encoded contents of bytevector @var{utf}.")
1890 #define FUNC_NAME s_scm_r6rs_utf8_to_string
1892 SCM str;
1893 int err;
1894 char *c_str = NULL, *c_locale;
1895 const char *c_utf;
1896 size_t c_utf_len, c_strlen = 0;
1898 SCM_VALIDATE_R6RS_BYTEVECTOR (1, utf);
1900 c_utf_len = SCM_R6RS_BYTEVECTOR_LENGTH (utf);
1902 c_locale = (char *) alloca (strlen (locale_charset ()) + 1);
1903 strcpy (c_locale, locale_charset ());
1905 c_utf = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (utf);
1906 err = mem_iconveh (c_utf, c_utf_len,
1907 "UTF-8", c_locale,
1908 iconveh_question_mark, NULL,
1909 &c_str, &c_strlen);
1910 if (SCM_UNLIKELY (err))
1911 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A",
1912 scm_list_1 (utf), err);
1913 else
1914 /* C_STR is null-terminated. */
1915 str = scm_take_locale_stringn (c_str, c_strlen);
1917 return (str);
1919 #undef FUNC_NAME
1921 SCM_DEFINE (scm_r6rs_utf16_to_string, "utf16->string",
1922 1, 1, 0,
1923 (SCM utf, SCM endianness),
1924 "Return a newly allocate string that contains from the UTF-17-"
1925 "encoded contents of bytevector @var{utf}.")
1926 #define FUNC_NAME s_scm_r6rs_utf16_to_string
1928 UTF_TO_STRING (16);
1930 #undef FUNC_NAME
1932 SCM_DEFINE (scm_r6rs_utf32_to_string, "utf32->string",
1933 1, 1, 0,
1934 (SCM utf, SCM endianness),
1935 "Return a newly allocate string that contains from the UTF-17-"
1936 "encoded contents of bytevector @var{utf}.")
1937 #define FUNC_NAME s_scm_r6rs_utf32_to_string
1939 UTF_TO_STRING (32);
1941 #undef FUNC_NAME
1945 /* Initialization. */
1947 void
1948 scm_init_r6rs_bytevector (void)
1950 #include "bytevector.x"
1952 #ifdef WORDS_BIGENDIAN
1953 native_endianness = scm_sym_big;
1954 #else
1955 native_endianness = scm_sym_little;
1956 #endif
1958 scm_r6rs_endianness_big = scm_sym_big;
1959 scm_r6rs_endianness_little = scm_sym_little;
1961 scm_r6rs_null_bytevector =
1962 scm_gc_protect_object (make_bytevector_from_buffer (0, NULL));