bytevector: Add support for `utfXX->string'.
[guile-r6rs-libs.git] / src / bytevector.c
bloba5df71fd5ad45d8035e91860bfc0c39ec87ed854
1 /* Guile-R6RS-Libs --- Implementation of R6RS standard libraries.
2 Copyright (C) 2007 Ludovic Courtès <ludovic.courtes@laas.fr>
4 Guile-R6RS-Libs is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 Guile-R6RS-Libs is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with Guile-R6RS-Libs; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
18 #include "config.h"
20 #include <libguile.h>
21 #include <gmp.h>
23 #include "bytevector.h"
24 #include "ieee-754.h"
25 #include "uniconv.h"
26 #include "utils.h"
28 #include <byteswap.h>
30 #ifdef HAVE_LIMITS_H
31 # include <limits.h>
32 #else
33 /* Assuming 32-bit longs. */
34 # define ULONG_MAX 4294967295UL
35 #endif
37 #include <string.h>
41 /* Utilities. */
43 /* Convenience macros. These are used by the various templates (macros) that
44 are parameterized by integer signedness. */
45 #define INT8_T_signed scm_t_int8
46 #define INT8_T_unsigned scm_t_uint8
47 #define INT16_T_signed scm_t_int16
48 #define INT16_T_unsigned scm_t_uint16
49 #define is_signed_int8(_x) (((_x) >= -128L) && ((_x) <= 127L))
50 #define is_unsigned_int8(_x) ((_x) <= 255UL)
51 #define is_signed_int16(_x) (((_x) >= -32768L) && ((_x) <= 32767L))
52 #define is_unsigned_int16(_x) ((_x) <= 65535UL)
53 #define SIGNEDNESS_signed 1
54 #define SIGNEDNESS_unsigned 0
56 #define INT_TYPE(_size, _sign) INT ## _size ## _T_ ## _sign
57 #define INT_SWAP(_size) bswap_ ## _size
58 #define INT_VALID_P(_size, _sign) is_ ## _sign ## _int ## _size
59 #define SIGNEDNESS(_sign) SIGNEDNESS_ ## _sign
62 #define INTEGER_ACCESSOR_PROLOGUE(_len, _sign) \
63 unsigned c_len, c_index; \
64 _sign char *c_bv; \
66 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv); \
67 c_index = scm_to_uint (index); \
69 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv); \
70 c_bv = (_sign char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv); \
72 if (EXPECT_FALSE (c_index + ((_len) >> 3UL) - 1 >= c_len)) \
73 scm_out_of_range (FUNC_NAME, index);
75 /* Template for fixed-size integer access (only 8, 16 or 32-bit). */
76 #define INTEGER_REF(_len, _sign) \
77 SCM result; \
79 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
80 SCM_VALIDATE_SYMBOL (3, endianness); \
82 { \
83 INT_TYPE (_len, _sign) c_result; \
85 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
86 if (!scm_is_eq (endianness, native_endianness)) \
87 c_result = INT_SWAP (_len) (c_result); \
89 result = SCM_I_MAKINUM (c_result); \
90 } \
92 return result;
94 /* Template for fixed-size integer access using the native endianness. */
95 #define INTEGER_NATIVE_REF(_len, _sign) \
96 SCM result; \
98 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
101 INT_TYPE (_len, _sign) c_result; \
103 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
104 result = SCM_I_MAKINUM (c_result); \
107 return result;
109 /* Template for fixed-size integer modification (only 8, 16 or 32-bit). */
110 #define INTEGER_SET(_len, _sign) \
111 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
112 SCM_VALIDATE_SYMBOL (3, endianness); \
115 _sign long c_value; \
116 INT_TYPE (_len, _sign) c_value_short; \
118 if (EXPECT_FALSE (!SCM_I_INUMP (value))) \
119 scm_wrong_type_arg (FUNC_NAME, 3, value); \
121 c_value = SCM_I_INUM (value); \
122 if (EXPECT_FALSE (!INT_VALID_P (_len, _sign) (c_value))) \
123 scm_out_of_range (FUNC_NAME, value); \
125 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
126 if (!scm_is_eq (endianness, native_endianness)) \
127 c_value_short = INT_SWAP (_len) (c_value_short); \
129 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
132 return SCM_UNSPECIFIED;
134 /* Template for fixed-size integer modification using the native
135 endianness. */
136 #define INTEGER_NATIVE_SET(_len, _sign) \
137 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
140 _sign long c_value; \
141 INT_TYPE (_len, _sign) c_value_short; \
143 if (EXPECT_FALSE (!SCM_I_INUMP (value))) \
144 scm_wrong_type_arg (FUNC_NAME, 3, value); \
146 c_value = SCM_I_INUM (value); \
147 if (EXPECT_FALSE (!INT_VALID_P (_len, _sign) (c_value))) \
148 scm_out_of_range (FUNC_NAME, value); \
150 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
152 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
155 return SCM_UNSPECIFIED;
159 /* Bytevector type. */
161 SCM_GLOBAL_SMOB (scm_tc16_r6rs_bytevector, "r6rs-bytevector", 0);
163 #define SCM_R6RS_BYTEVECTOR_SET_LENGTH(_bv, _len) \
164 SCM_SET_SMOB_DATA ((_bv), (scm_t_bits) (_len))
165 #define SCM_R6RS_BYTEVECTOR_SET_CONTENTS(_bv, _buf) \
166 SCM_SET_SMOB_DATA_2 ((_bv), (scm_t_bits) (_buf))
168 /* The empty bytevector. */
169 SCM scm_r6rs_null_bytevector = SCM_UNSPECIFIED;
172 static inline SCM
173 make_bytevector_from_buffer (unsigned len, signed char *contents)
175 /* Assuming LEN > SCM_R6RS_BYTEVECTOR_INLINE_THRESHOLD. */
176 SCM_RETURN_NEWSMOB2 (scm_tc16_r6rs_bytevector, len, contents);
179 static inline SCM
180 make_bytevector (unsigned len)
182 SCM bv;
184 if (EXPECT_FALSE (len == 0))
185 bv = scm_r6rs_null_bytevector;
186 else
188 signed char *contents = NULL;
190 if (!SCM_R6RS_BYTEVECTOR_INLINEABLE_SIZE_P (len))
191 contents = (signed char *) scm_gc_malloc (len, SCM_GC_BYTEVECTOR);
193 bv = make_bytevector_from_buffer (len, contents);
196 return bv;
199 /* Return a new bytevector of size LEN octets. */
201 scm_r6rs_c_make_bytevector (unsigned len)
203 return (make_bytevector (len));
206 /* Return a bytevector of size LEN made up of CONTENTS. The area pointed to
207 by CONTENTS must have been allocated using `scm_gc_malloc ()'. */
209 scm_r6rs_c_take_bytevector (signed char *contents, unsigned len)
211 SCM bv;
213 if (EXPECT_FALSE (SCM_R6RS_BYTEVECTOR_INLINEABLE_SIZE_P (len)))
215 /* Copy CONTENTS into an "in-line" buffer, then free CONTENTS. */
216 signed char *c_bv;
218 bv = make_bytevector (len);
219 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
220 memcpy (c_bv, contents, len);
221 scm_gc_free (contents, len, SCM_GC_BYTEVECTOR);
223 else
224 bv = make_bytevector_from_buffer (len, contents);
226 return bv;
229 /* Shrink BV to C_NEW_LEN (which is assumed to be smaller than its current
230 size) and return BV. */
232 scm_r6rs_i_shrink_bytevector (SCM bv, unsigned c_new_len)
234 if (!SCM_R6RS_BYTEVECTOR_INLINE_P (bv))
236 unsigned c_len;
237 signed char *c_bv, *c_new_bv;
239 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
240 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
242 SCM_R6RS_BYTEVECTOR_SET_LENGTH (bv, c_new_len);
244 if (SCM_R6RS_BYTEVECTOR_INLINEABLE_SIZE_P (c_new_len))
246 /* Copy to the in-line buffer and free the current buffer. */
247 c_new_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
248 memcpy (c_new_bv, c_bv, c_new_len);
249 scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
251 else
253 /* Resize the existing buffer. */
254 c_new_bv = scm_gc_realloc (c_bv, c_len, c_new_len,
255 SCM_GC_BYTEVECTOR);
256 SCM_R6RS_BYTEVECTOR_SET_CONTENTS (bv, c_new_bv);
260 return bv;
263 SCM_SMOB_PRINT (scm_tc16_r6rs_bytevector, print_bytevector,
264 bv, port, pstate)
266 unsigned c_len, i;
267 unsigned char *c_bv;
269 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
270 c_bv = (unsigned char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
272 scm_puts ("#vu8(", port);
273 for (i = 0; i < c_len; i++)
275 if (i > 0)
276 scm_putc (' ', port);
278 scm_uintprint (c_bv[i], 10, port);
281 scm_putc (')', port);
283 return 1;
286 SCM_SMOB_FREE (scm_tc16_r6rs_bytevector, free_bytevector, bv)
289 if (!SCM_R6RS_BYTEVECTOR_INLINE_P (bv))
291 unsigned c_len;
292 signed char *c_bv;
294 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
295 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
297 scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
300 return 0;
305 /* General operations. */
307 SCM_SYMBOL (scm_sym_big, "big");
308 SCM_SYMBOL (scm_sym_little, "little");
310 /* Host endianness (a symbol). */
311 static SCM native_endianness = SCM_UNSPECIFIED;
313 /* Byte-swapping. */
314 #ifndef bswap_24
315 # define bswap_24(_x) \
316 ((((_x) & 0xff0000) >> 16) | \
317 (((_x) & 0x00ff00)) | \
318 (((_x) & 0x0000ff) << 16))
319 #endif
322 SCM_DEFINE (scm_r6rs_native_endianness, "native-endianness", 0, 0, 0,
323 (void),
324 "Return a symbol denoting the machine's native endianness.")
325 #define FUNC_NAME s_scm_r6rs_native_endianness
327 return native_endianness;
329 #undef FUNC_NAME
331 SCM_DEFINE (scm_r6rs_bytevector_p, "bytevector?", 1, 0, 0,
332 (SCM obj),
333 "Return true if @var{obj} is a bytevector.")
334 #define FUNC_NAME s_scm_r6rs_bytevector_p
336 return (scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_r6rs_bytevector,
337 obj)));
339 #undef FUNC_NAME
341 SCM_DEFINE (scm_r6rs_make_bytevector, "make-bytevector", 1, 1, 0,
342 (SCM len, SCM fill),
343 "Return a newly allocated bytevector of @var{len} bytes, "
344 "optionally filled with @var{fill}.")
345 #define FUNC_NAME s_scm_r6rs_make_bytevector
347 SCM bv;
348 unsigned c_len;
349 signed char c_fill = '\0';
351 SCM_VALIDATE_UINT_COPY (1, len, c_len);
352 if (fill != SCM_UNDEFINED)
354 int value;
356 value = scm_to_int (fill);
357 if (EXPECT_FALSE ((value < -128) || (value > 255)))
358 scm_out_of_range (FUNC_NAME, fill);
359 c_fill = (signed char) value;
362 bv = make_bytevector (c_len);
363 if (fill != SCM_UNDEFINED)
365 unsigned i;
366 signed char *contents;
368 contents = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
369 for (i = 0; i < c_len; i++)
370 contents[i] = c_fill;
373 return bv;
375 #undef FUNC_NAME
377 SCM_DEFINE (scm_r6rs_bytevector_length, "bytevector-length", 1, 0, 0,
378 (SCM bv),
379 "Return the length (in bytes) of @var{bv}.")
380 #define FUNC_NAME s_scm_r6rs_bytevector_length
382 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv);
384 return (scm_from_uint (SCM_R6RS_BYTEVECTOR_LENGTH (bv)));
386 #undef FUNC_NAME
388 SCM_DEFINE (scm_r6rs_bytevector_eq_p, "bytevector=?", 2, 0, 0,
389 (SCM bv1, SCM bv2),
390 "Return is @var{bv1} equals to @var{bv2}---i.e., if they "
391 "have the same length and contents.")
392 #define FUNC_NAME s_scm_r6rs_bytevector_eq_p
394 SCM result = SCM_BOOL_F;
395 unsigned c_len1, c_len2;
397 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv1);
398 SCM_VALIDATE_R6RS_BYTEVECTOR (2, bv2);
400 c_len1 = SCM_R6RS_BYTEVECTOR_LENGTH (bv1);
401 c_len2 = SCM_R6RS_BYTEVECTOR_LENGTH (bv2);
403 if (c_len1 == c_len2)
405 signed char *c_bv1, *c_bv2;
407 c_bv1 = SCM_R6RS_BYTEVECTOR_CONTENTS (bv1);
408 c_bv2 = SCM_R6RS_BYTEVECTOR_CONTENTS (bv2);
410 result = scm_from_bool (!memcmp (c_bv1, c_bv2, c_len1));
413 return result;
415 #undef FUNC_NAME
417 SCM_DEFINE (scm_r6rs_bytevector_fill_x, "bytevector-fill!", 2, 0, 0,
418 (SCM bv, SCM fill),
419 "Fill bytevector @var{bv} with @var{fill}, a byte.")
420 #define FUNC_NAME s_scm_r6rs_bytevector_fill_x
422 unsigned c_len, i;
423 signed char *c_bv, c_fill;
425 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv);
426 c_fill = scm_to_int8 (fill);
428 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
429 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
431 for (i = 0; i < c_len; i++)
432 c_bv[i] = c_fill;
434 return SCM_UNSPECIFIED;
436 #undef FUNC_NAME
438 SCM_DEFINE (scm_r6rs_bytevector_copy_x, "bytevector-copy!", 5, 0, 0,
439 (SCM source, SCM source_start, SCM target, SCM target_start,
440 SCM len),
441 "Copy @var{len} bytes from @var{source} into @var{target}, "
442 "starting reading from @var{source_start} (a positive index "
443 "within @var{source}) and start writing at "
444 "@var{target_start}.")
445 #define FUNC_NAME s_scm_r6rs_bytevector_copy_x
447 unsigned c_len, c_source_len, c_target_len;
448 unsigned c_source_start, c_target_start;
449 signed char *c_source, *c_target;
451 SCM_VALIDATE_R6RS_BYTEVECTOR (1, source);
452 SCM_VALIDATE_R6RS_BYTEVECTOR (3, target);
454 c_len = scm_to_uint (len);
455 c_source_start = scm_to_uint (source_start);
456 c_target_start = scm_to_uint (target_start);
458 c_source = SCM_R6RS_BYTEVECTOR_CONTENTS (source);
459 c_target = SCM_R6RS_BYTEVECTOR_CONTENTS (target);
460 c_source_len = SCM_R6RS_BYTEVECTOR_LENGTH (source);
461 c_target_len = SCM_R6RS_BYTEVECTOR_LENGTH (target);
463 if (EXPECT_FALSE (c_source_start + c_len > c_source_len))
464 scm_out_of_range (FUNC_NAME, source_start);
465 if (EXPECT_FALSE (c_target_start + c_len > c_target_len))
466 scm_out_of_range (FUNC_NAME, target_start);
468 memcpy (c_target + c_target_start,
469 c_source + c_source_start,
470 c_len);
472 return SCM_UNSPECIFIED;
474 #undef FUNC_NAME
476 SCM_DEFINE (scm_r6rs_bytevector_copy, "bytevector-copy", 1, 0, 0,
477 (SCM bv),
478 "Return a newly allocated copy of @var{bv}.")
479 #define FUNC_NAME s_scm_r6rs_bytevector_copy
481 SCM copy;
482 unsigned c_len;
483 signed char *c_bv, *c_copy;
485 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv);
487 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
488 c_bv = SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
490 copy = make_bytevector (c_len);
491 c_copy = SCM_R6RS_BYTEVECTOR_CONTENTS (copy);
492 memcpy (c_copy, c_bv, c_len);
494 return copy;
496 #undef FUNC_NAME
499 /* Operations on bytes and octets. */
501 SCM_DEFINE (scm_r6rs_bytevector_u8_ref, "bytevector-u8-ref", 2, 0, 0,
502 (SCM bv, SCM index),
503 "Return the octet located at @var{index} in @var{bv}.")
504 #define FUNC_NAME s_scm_r6rs_bytevector_u8_ref
506 INTEGER_NATIVE_REF (8, unsigned);
508 #undef FUNC_NAME
510 SCM_DEFINE (scm_r6rs_bytevector_s8_ref, "bytevector-s8-ref", 2, 0, 0,
511 (SCM bv, SCM index),
512 "Return the byte located at @var{index} in @var{bv}.")
513 #define FUNC_NAME s_scm_r6rs_bytevector_u8_ref
515 INTEGER_NATIVE_REF (8, signed);
517 #undef FUNC_NAME
519 SCM_DEFINE (scm_r6rs_bytevector_u8_set_x, "bytevector-u8-set!", 3, 0, 0,
520 (SCM bv, SCM index, SCM value),
521 "Return the octet located at @var{index} in @var{bv}.")
522 #define FUNC_NAME s_scm_r6rs_bytevector_u8_set_x
524 INTEGER_NATIVE_SET (8, unsigned);
526 #undef FUNC_NAME
528 SCM_DEFINE (scm_r6rs_bytevector_s8_set_x, "bytevector-s8-set!", 3, 0, 0,
529 (SCM bv, SCM index, SCM value),
530 "Return the octet located at @var{index} in @var{bv}.")
531 #define FUNC_NAME s_scm_r6rs_bytevector_u8_set_x
533 INTEGER_NATIVE_SET (8, signed);
535 #undef FUNC_NAME
537 #undef OCTET_ACCESSOR_PROLOGUE
540 SCM_DEFINE (scm_r6rs_bytevector_to_u8_list, "bytevector->u8-list", 1, 0, 0,
541 (SCM bv),
542 "Return a newly allocated list of octets containing the "
543 "contents of @var{bv}.")
544 #define FUNC_NAME s_scm_r6rs_bytevector_to_u8_list
546 SCM lst, pair;
547 unsigned c_len, i;
548 unsigned char *c_bv;
550 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv);
552 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv);
553 c_bv = (unsigned char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
555 lst = scm_make_list (scm_from_uint (c_len), SCM_UNSPECIFIED);
556 for (i = 0, pair = lst;
557 i < c_len;
558 i++, pair = SCM_CDR (pair))
560 SCM_SETCAR (pair, SCM_I_MAKINUM (c_bv[i]));
563 return lst;
565 #undef FUNC_NAME
567 SCM_DEFINE (scm_r6rs_u8_list_to_bytevector, "u8-list->bytevector", 1, 0, 0,
568 (SCM lst),
569 "Turn @var{lst}, a list of octets, into a bytevector.")
570 #define FUNC_NAME s_scm_r6rs_u8_list_to_bytevector
572 SCM bv, item;
573 unsigned c_len, i;
574 unsigned char *c_bv;
576 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len);
578 bv = make_bytevector (c_len);
579 c_bv = (unsigned char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv);
581 for (i = 0; i < c_len; lst = SCM_CDR (lst), i++)
583 item = SCM_CAR (lst);
585 if (EXPECT_TRUE (SCM_I_INUMP (item)))
587 long c_item;
589 c_item = SCM_I_INUM (item);
590 if (EXPECT_TRUE ((c_item >= 0) && (c_item < 256)))
591 c_bv[i] = (unsigned char) c_item;
592 else
593 goto type_error;
595 else
596 goto type_error;
599 return bv;
601 type_error:
602 scm_wrong_type_arg (FUNC_NAME, 1, item);
604 return SCM_BOOL_F;
606 #undef FUNC_NAME
608 /* Compute the two's complement of VALUE (a positive integer) on SIZE octets
609 using (2^(SIZE * 8) - VALUE). */
610 static inline void
611 twos_complement (mpz_t value, size_t size)
613 unsigned long bit_count;
615 /* We expect BIT_COUNT to fit in a unsigned long thanks to the range
616 checking on SIZE performed earlier. */
617 bit_count = (unsigned long) size << 3UL;
619 if (EXPECT_TRUE (bit_count < sizeof (unsigned long)))
620 mpz_ui_sub (value, 1UL << bit_count, value);
621 else
623 mpz_t max;
625 mpz_init (max);
626 mpz_ui_pow_ui (max, 2, bit_count);
627 mpz_sub (value, max, value);
628 mpz_clear (max);
632 static inline SCM
633 bytevector_large_ref (const char *c_bv, size_t c_size, int signed_p,
634 SCM endianness)
636 SCM result;
637 mpz_t c_mpz;
638 int c_endianness, negative_p = 0;
640 if (signed_p)
642 if (scm_is_eq (endianness, scm_sym_big))
643 negative_p = c_bv[0] & 0x80;
644 else
645 negative_p = c_bv[c_size - 1] & 0x80;
648 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
650 mpz_init (c_mpz);
651 mpz_import (c_mpz, 1 /* 1 word */, 1 /* word order doesn't matter */,
652 c_size /* word is C_SIZE-byte long */,
653 c_endianness,
654 0 /* nails */, c_bv);
656 if (signed_p && negative_p)
658 twos_complement (c_mpz, c_size);
659 mpz_neg (c_mpz, c_mpz);
662 result = scm_from_mpz (c_mpz);
663 mpz_clear (c_mpz); /* FIXME: Needed? */
665 return result;
668 static inline int
669 bytevector_large_set (char *c_bv, size_t c_size, int signed_p,
670 SCM value, SCM endianness)
672 mpz_t c_mpz;
673 int c_endianness, c_sign, err = 0;
675 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
677 mpz_init (c_mpz);
678 scm_to_mpz (value, c_mpz);
680 c_sign = mpz_sgn (c_mpz);
681 if (c_sign < 0)
683 if (EXPECT_TRUE (signed_p))
685 mpz_neg (c_mpz, c_mpz);
686 twos_complement (c_mpz, c_size);
688 else
690 err = -1;
691 goto finish;
695 if (c_sign == 0)
696 /* Zero. */
697 memset (c_bv, 0, c_size);
698 else
700 size_t word_count, value_size;
702 value_size = (mpz_sizeinbase (c_mpz, 2) + (8 * c_size)) / (8 * c_size);
703 if (EXPECT_FALSE (value_size > c_size))
705 err = -2;
706 goto finish;
710 mpz_export (c_bv, &word_count, 1 /* word order doesn't matter */,
711 c_size, c_endianness,
712 0 /* nails */, c_mpz);
713 if (EXPECT_FALSE (word_count != 1))
714 /* Shouldn't happen since we already checked with VALUE_SIZE. */
715 abort ();
718 finish:
719 mpz_clear (c_mpz);
721 return err;
724 #define GENERIC_INTEGER_ACCESSOR_PROLOGUE(_sign) \
725 unsigned c_len, c_index, c_size; \
726 char *c_bv; \
728 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv); \
729 c_index = scm_to_uint (index); \
730 c_size = scm_to_uint (size); \
732 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv); \
733 c_bv = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv); \
735 /* C_SIZE must have its 3 higher bits set to zero so that \
736 multiplying it by 8 yields a number that fits in an \
737 unsigned long. */ \
738 if (EXPECT_FALSE ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
739 scm_out_of_range (FUNC_NAME, size); \
740 if (EXPECT_FALSE (c_index + c_size > c_len)) \
741 scm_out_of_range (FUNC_NAME, index);
744 /* Template of an integer reference function. */
745 #define GENERIC_INTEGER_REF(_sign) \
746 SCM result; \
748 if (c_size < 3) \
750 int swap; \
751 _sign int value; \
753 swap = !scm_is_eq (endianness, native_endianness); \
754 switch (c_size) \
756 case 1: \
758 _sign char c_value8; \
759 memcpy (&c_value8, c_bv, 1); \
760 value = c_value8; \
762 break; \
763 case 2: \
765 INT_TYPE (16, _sign) c_value16; \
766 memcpy (&c_value16, c_bv, 2); \
767 if (swap) \
768 value = (INT_TYPE (16, _sign)) bswap_16 (c_value16); \
769 else \
770 value = c_value16; \
772 break; \
773 default: \
774 abort (); \
777 result = SCM_I_MAKINUM ((_sign int) value); \
779 else \
780 result = bytevector_large_ref ((char *) c_bv, \
781 c_size, SIGNEDNESS (_sign), \
782 endianness); \
784 return result;
786 static inline SCM
787 bytevector_signed_ref (const char *c_bv, size_t c_size, SCM endianness)
789 GENERIC_INTEGER_REF (signed);
792 static inline SCM
793 bytevector_unsigned_ref (const char *c_bv, size_t c_size, SCM endianness)
795 GENERIC_INTEGER_REF (unsigned);
799 /* Template of an integer assignment function. */
800 #define GENERIC_INTEGER_SET(_sign) \
801 if (c_size < 3) \
803 _sign int c_value; \
805 if (EXPECT_FALSE (!SCM_I_INUMP (value))) \
806 goto range_error; \
808 c_value = SCM_I_INUM (value); \
809 switch (c_size) \
811 case 1: \
812 if (EXPECT_TRUE (INT_VALID_P (8, _sign) (c_value))) \
814 _sign char c_value8; \
815 c_value8 = (_sign char) c_value; \
816 memcpy (c_bv, &c_value8, 1); \
818 else \
819 goto range_error; \
820 break; \
822 case 2: \
823 if (EXPECT_TRUE (INT_VALID_P (16, _sign) (c_value))) \
825 int swap; \
826 INT_TYPE (16, _sign) c_value16; \
828 swap = !scm_is_eq (endianness, native_endianness); \
830 c_value16 = \
831 swap ? bswap_16 (c_value) : c_value; \
832 memcpy (c_bv, &c_value16, 2); \
834 else \
835 goto range_error; \
836 break; \
838 default: \
839 abort (); \
842 else \
844 int err; \
846 err = bytevector_large_set (c_bv, c_size, \
847 SIGNEDNESS (_sign), \
848 value, endianness); \
849 if (err) \
850 goto range_error; \
853 return; \
855 range_error: \
856 scm_out_of_range (FUNC_NAME, value); \
857 return;
859 static inline void
860 bytevector_signed_set (char *c_bv, size_t c_size,
861 SCM value, SCM endianness,
862 const char *func_name)
863 #define FUNC_NAME func_name
865 GENERIC_INTEGER_SET (signed);
867 #undef FUNC_NAME
869 static inline void
870 bytevector_unsigned_set (char *c_bv, size_t c_size,
871 SCM value, SCM endianness,
872 const char *func_name)
873 #define FUNC_NAME func_name
875 GENERIC_INTEGER_SET (unsigned);
877 #undef FUNC_NAME
879 #undef GENERIC_INTEGER_SET
880 #undef GENERIC_INTEGER_REF
883 SCM_DEFINE (scm_r6rs_bytevector_uint_ref, "bytevector-uint-ref", 4, 0, 0,
884 (SCM bv, SCM index, SCM endianness, SCM size),
885 "Return the @var{size}-octet long unsigned integer at index "
886 "@var{index} in @var{bv}.")
887 #define FUNC_NAME s_scm_r6rs_bytevector_uint_ref
889 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
891 return (bytevector_unsigned_ref (&c_bv[c_index], c_size, endianness));
893 #undef FUNC_NAME
895 SCM_DEFINE (scm_r6rs_bytevector_sint_ref, "bytevector-sint-ref", 4, 0, 0,
896 (SCM bv, SCM index, SCM endianness, SCM size),
897 "Return the @var{size}-octet long unsigned integer at index "
898 "@var{index} in @var{bv}.")
899 #define FUNC_NAME s_scm_r6rs_bytevector_sint_ref
901 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
903 return (bytevector_signed_ref (&c_bv[c_index], c_size, endianness));
905 #undef FUNC_NAME
907 SCM_DEFINE (scm_r6rs_bytevector_uint_set_x, "bytevector-uint-set!", 5, 0, 0,
908 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
909 "Set the @var{size}-octet long unsigned integer at @var{index} "
910 "to @var{value}.")
911 #define FUNC_NAME s_scm_r6rs_bytevector_uint_set_x
913 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
915 bytevector_unsigned_set (&c_bv[c_index], c_size, value, endianness,
916 FUNC_NAME);
918 return SCM_UNSPECIFIED;
920 #undef FUNC_NAME
922 SCM_DEFINE (scm_r6rs_bytevector_sint_set_x, "bytevector-sint-set!", 5, 0, 0,
923 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
924 "Set the @var{size}-octet long signed integer at @var{index} "
925 "to @var{value}.")
926 #define FUNC_NAME s_scm_r6rs_bytevector_sint_set_x
928 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
930 bytevector_signed_set (&c_bv[c_index], c_size, value, endianness,
931 FUNC_NAME);
933 return SCM_UNSPECIFIED;
935 #undef FUNC_NAME
939 /* Operations on integers of arbitrary size. */
941 #define INTEGERS_TO_LIST(_sign) \
942 SCM lst, pair; \
943 size_t i, c_len, c_size; \
945 SCM_VALIDATE_R6RS_BYTEVECTOR (1, bv); \
946 SCM_VALIDATE_SYMBOL (2, endianness); \
947 c_size = scm_to_uint (size); \
949 c_len = SCM_R6RS_BYTEVECTOR_LENGTH (bv); \
950 if (c_len == 0) \
951 lst = SCM_EOL; \
952 else \
954 const char *c_bv; \
956 c_bv = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv); \
958 lst = scm_make_list (scm_from_uint (c_len / c_size), \
959 SCM_UNSPECIFIED); \
960 for (i = 0, pair = lst; \
961 i <= c_len - c_size; \
962 i += c_size, c_bv += c_size, pair = SCM_CDR (pair)) \
964 SCM_SETCAR (pair, \
965 bytevector_ ## _sign ## _ref (c_bv, c_size, \
966 endianness)); \
970 return lst;
972 SCM_DEFINE (scm_r6rs_bytevector_to_sint_list, "bytevector->sint-list",
973 3, 0, 0,
974 (SCM bv, SCM endianness, SCM size),
975 "Return a list of signed integers of @var{size} octets "
976 "representing the contents of @var{bv}.")
977 #define FUNC_NAME s_scm_r6rs_bytevector_to_sint_list
979 INTEGERS_TO_LIST (signed);
981 #undef FUNC_NAME
983 SCM_DEFINE (scm_r6rs_bytevector_to_uint_list, "bytevector->uint-list",
984 3, 0, 0,
985 (SCM bv, SCM endianness, SCM size),
986 "Return a list of unsigned integers of @var{size} octets "
987 "representing the contents of @var{bv}.")
988 #define FUNC_NAME s_scm_r6rs_bytevector_to_uint_list
990 INTEGERS_TO_LIST (unsigned);
992 #undef FUNC_NAME
994 #undef INTEGER_TO_LIST
997 #define INTEGER_LIST_TO_BYTEVECTOR(_sign) \
998 SCM bv; \
999 size_t c_len, c_size; \
1000 char *c_bv, *c_bv_ptr; \
1002 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len); \
1003 SCM_VALIDATE_SYMBOL (2, endianness); \
1004 c_size = scm_to_uint (size); \
1006 if (EXPECT_FALSE ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
1007 scm_out_of_range (FUNC_NAME, size); \
1009 bv = make_bytevector (c_len * c_size); \
1010 c_bv = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (bv); \
1012 for (c_bv_ptr = c_bv; \
1013 !scm_is_null (lst); \
1014 lst = SCM_CDR (lst), c_bv_ptr += c_size) \
1016 bytevector_ ## _sign ## _set (c_bv_ptr, c_size, \
1017 SCM_CAR (lst), endianness, \
1018 FUNC_NAME); \
1021 return bv;
1024 SCM_DEFINE (scm_r6rs_uint_list_to_bytevector, "uint-list->bytevector",
1025 3, 0, 0,
1026 (SCM lst, SCM endianness, SCM size),
1027 "Return a bytevector containing the unsigned integers "
1028 "listed in @var{lst} and encoded on @var{size} octets "
1029 "according to @var{endianness}.")
1030 #define FUNC_NAME s_scm_r6rs_uint_list_to_bytevector
1032 INTEGER_LIST_TO_BYTEVECTOR (unsigned);
1034 #undef FUNC_NAME
1036 SCM_DEFINE (scm_r6rs_sint_list_to_bytevector, "sint-list->bytevector",
1037 3, 0, 0,
1038 (SCM lst, SCM endianness, SCM size),
1039 "Return a bytevector containing the signed integers "
1040 "listed in @var{lst} and encoded on @var{size} octets "
1041 "according to @var{endianness}.")
1042 #define FUNC_NAME s_scm_r6rs_sint_list_to_bytevector
1044 INTEGER_LIST_TO_BYTEVECTOR (signed);
1046 #undef FUNC_NAME
1048 #undef INTEGER_LIST_TO_BYTEVECTOR
1052 /* Operations on 16-bit integers. */
1054 SCM_DEFINE (scm_r6rs_bytevector_u16_ref, "bytevector-u16-ref",
1055 3, 0, 0,
1056 (SCM bv, SCM index, SCM endianness),
1057 "Return the unsigned 16-bit integer from @var{bv} at "
1058 "@var{index}.")
1059 #define FUNC_NAME s_scm_r6rs_bytevector_u16_ref
1061 INTEGER_REF (16, unsigned);
1063 #undef FUNC_NAME
1065 SCM_DEFINE (scm_r6rs_bytevector_s16_ref, "bytevector-s16-ref",
1066 3, 0, 0,
1067 (SCM bv, SCM index, SCM endianness),
1068 "Return the signed 16-bit integer from @var{bv} at "
1069 "@var{index}.")
1070 #define FUNC_NAME s_scm_r6rs_bytevector_s16_ref
1072 INTEGER_REF (16, signed);
1074 #undef FUNC_NAME
1076 SCM_DEFINE (scm_r6rs_bytevector_u16_native_ref, "bytevector-u16-native-ref",
1077 2, 0, 0,
1078 (SCM bv, SCM index),
1079 "Return the unsigned 16-bit integer from @var{bv} at "
1080 "@var{index} using the native endianness.")
1081 #define FUNC_NAME s_scm_r6rs_bytevector_u16_native_ref
1083 INTEGER_NATIVE_REF (16, unsigned);
1085 #undef FUNC_NAME
1087 SCM_DEFINE (scm_r6rs_bytevector_s16_native_ref, "bytevector-s16-native-ref",
1088 2, 0, 0,
1089 (SCM bv, SCM index),
1090 "Return the unsigned 16-bit integer from @var{bv} at "
1091 "@var{index} using the native endianness.")
1092 #define FUNC_NAME s_scm_r6rs_bytevector_u16_native_ref
1094 INTEGER_NATIVE_REF (16, signed);
1096 #undef FUNC_NAME
1098 SCM_DEFINE (scm_r6rs_bytevector_u16_set_x, "bytevector-u16-set!",
1099 4, 0, 0,
1100 (SCM bv, SCM index, SCM value, SCM endianness),
1101 "Store @var{value} in @var{bv} at @var{index} according to "
1102 "@var{endianness}.")
1103 #define FUNC_NAME s_scm_r6rs_bytevector_u16_set_x
1105 INTEGER_SET (16, unsigned);
1107 #undef FUNC_NAME
1109 SCM_DEFINE (scm_r6rs_bytevector_s16_set_x, "bytevector-s16-set!",
1110 4, 0, 0,
1111 (SCM bv, SCM index, SCM value, SCM endianness),
1112 "Store @var{value} in @var{bv} at @var{index} according to "
1113 "@var{endianness}.")
1114 #define FUNC_NAME s_scm_r6rs_bytevector_s16_set_x
1116 INTEGER_SET (16, signed);
1118 #undef FUNC_NAME
1120 SCM_DEFINE (scm_r6rs_bytevector_u16_native_set_x, "bytevector-u16-native-set!",
1121 3, 0, 0,
1122 (SCM bv, SCM index, SCM value),
1123 "Store the unsigned integer @var{value} at index @var{index} "
1124 "of @var{bv} using the native endianness.")
1125 #define FUNC_NAME s_scm_r6rs_bytevector_u16_native_set_x
1127 INTEGER_NATIVE_SET (16, unsigned);
1129 #undef FUNC_NAME
1131 SCM_DEFINE (scm_r6rs_bytevector_s16_native_set_x, "bytevector-s16-native-set!",
1132 3, 0, 0,
1133 (SCM bv, SCM index, SCM value),
1134 "Store the signed integer @var{value} at index @var{index} "
1135 "of @var{bv} using the native endianness.")
1136 #define FUNC_NAME s_scm_r6rs_bytevector_u16_native_set_x
1138 INTEGER_NATIVE_SET (16, signed);
1140 #undef FUNC_NAME
1144 /* Operations on 32-bit integers. */
1146 /* Unfortunately, on 32-bit machines `SCM' is not large enough to hold
1147 arbitrary 32-bit integers. Thus we fall back to using the
1148 `large_{ref,set}' variants on 32-bit machines. */
1150 #define LARGE_INTEGER_REF(_len, _sign) \
1151 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1152 SCM_VALIDATE_SYMBOL (3, endianness); \
1154 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1155 SIGNEDNESS (_sign), endianness));
1157 #define LARGE_INTEGER_SET(_len, _sign) \
1158 int err; \
1159 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1160 SCM_VALIDATE_SYMBOL (4, endianness); \
1162 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1163 SIGNEDNESS (_sign), value, endianness); \
1164 if (EXPECT_FALSE (err)) \
1165 scm_out_of_range (FUNC_NAME, value); \
1167 return SCM_UNSPECIFIED;
1169 #define LARGE_INTEGER_NATIVE_REF(_len, _sign) \
1170 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1171 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1172 SIGNEDNESS (_sign), native_endianness));
1174 #define LARGE_INTEGER_NATIVE_SET(_len, _sign) \
1175 int err; \
1176 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1178 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1179 SIGNEDNESS (_sign), value, \
1180 native_endianness); \
1181 if (EXPECT_FALSE (err)) \
1182 scm_out_of_range (FUNC_NAME, value); \
1184 return SCM_UNSPECIFIED;
1187 SCM_DEFINE (scm_r6rs_bytevector_u32_ref, "bytevector-u32-ref",
1188 3, 0, 0,
1189 (SCM bv, SCM index, SCM endianness),
1190 "Return the unsigned 32-bit integer from @var{bv} at "
1191 "@var{index}.")
1192 #define FUNC_NAME s_scm_r6rs_bytevector_u32_ref
1194 #if SIZEOF_VOID_P > 4
1195 INTEGER_REF (32, unsigned);
1196 #else
1197 LARGE_INTEGER_REF (32, unsigned);
1198 #endif
1200 #undef FUNC_NAME
1202 SCM_DEFINE (scm_r6rs_bytevector_s32_ref, "bytevector-s32-ref",
1203 3, 0, 0,
1204 (SCM bv, SCM index, SCM endianness),
1205 "Return the signed 32-bit integer from @var{bv} at "
1206 "@var{index}.")
1207 #define FUNC_NAME s_scm_r6rs_bytevector_s32_ref
1209 #if SIZEOF_VOID_P > 4
1210 INTEGER_REF (32, signed);
1211 #else
1212 LARGE_INTEGER_REF (32, signed);
1213 #endif
1215 #undef FUNC_NAME
1217 SCM_DEFINE (scm_r6rs_bytevector_u32_native_ref, "bytevector-u32-native-ref",
1218 2, 0, 0,
1219 (SCM bv, SCM index),
1220 "Return the unsigned 32-bit integer from @var{bv} at "
1221 "@var{index} using the native endianness.")
1222 #define FUNC_NAME s_scm_r6rs_bytevector_u32_native_ref
1224 #if SIZEOF_VOID_P > 4
1225 INTEGER_NATIVE_REF (32, unsigned);
1226 #else
1227 LARGE_INTEGER_NATIVE_REF (32, unsigned);
1228 #endif
1230 #undef FUNC_NAME
1232 SCM_DEFINE (scm_r6rs_bytevector_s32_native_ref, "bytevector-s32-native-ref",
1233 2, 0, 0,
1234 (SCM bv, SCM index),
1235 "Return the unsigned 32-bit integer from @var{bv} at "
1236 "@var{index} using the native endianness.")
1237 #define FUNC_NAME s_scm_r6rs_bytevector_u32_native_ref
1239 #if SIZEOF_VOID_P > 4
1240 INTEGER_NATIVE_REF (32, signed);
1241 #else
1242 LARGE_INTEGER_NATIVE_REF (32, signed);
1243 #endif
1245 #undef FUNC_NAME
1247 SCM_DEFINE (scm_r6rs_bytevector_u32_set_x, "bytevector-u32-set!",
1248 4, 0, 0,
1249 (SCM bv, SCM index, SCM value, SCM endianness),
1250 "Store @var{value} in @var{bv} at @var{index} according to "
1251 "@var{endianness}.")
1252 #define FUNC_NAME s_scm_r6rs_bytevector_u32_set_x
1254 #if SIZEOF_VOID_P > 4
1255 INTEGER_SET (32, unsigned);
1256 #else
1257 LARGE_INTEGER_SET (32, unsigned);
1258 #endif
1260 #undef FUNC_NAME
1262 SCM_DEFINE (scm_r6rs_bytevector_s32_set_x, "bytevector-s32-set!",
1263 4, 0, 0,
1264 (SCM bv, SCM index, SCM value, SCM endianness),
1265 "Store @var{value} in @var{bv} at @var{index} according to "
1266 "@var{endianness}.")
1267 #define FUNC_NAME s_scm_r6rs_bytevector_s32_set_x
1269 #if SIZEOF_VOID_P > 4
1270 INTEGER_SET (32, signed);
1271 #else
1272 LARGE_INTEGER_SET (32, signed);
1273 #endif
1275 #undef FUNC_NAME
1277 SCM_DEFINE (scm_r6rs_bytevector_u32_native_set_x, "bytevector-u32-native-set!",
1278 3, 0, 0,
1279 (SCM bv, SCM index, SCM value),
1280 "Store the unsigned integer @var{value} at index @var{index} "
1281 "of @var{bv} using the native endianness.")
1282 #define FUNC_NAME s_scm_r6rs_bytevector_u32_native_set_x
1284 #if SIZEOF_VOID_P > 4
1285 INTEGER_NATIVE_SET (32, unsigned);
1286 #else
1287 LARGE_INTEGER_NATIVE_SET (32, unsigned);
1288 #endif
1290 #undef FUNC_NAME
1292 SCM_DEFINE (scm_r6rs_bytevector_s32_native_set_x, "bytevector-s32-native-set!",
1293 3, 0, 0,
1294 (SCM bv, SCM index, SCM value),
1295 "Store the signed integer @var{value} at index @var{index} "
1296 "of @var{bv} using the native endianness.")
1297 #define FUNC_NAME s_scm_r6rs_bytevector_u32_native_set_x
1299 #if SIZEOF_VOID_P > 4
1300 INTEGER_NATIVE_SET (32, signed);
1301 #else
1302 LARGE_INTEGER_NATIVE_SET (32, signed);
1303 #endif
1305 #undef FUNC_NAME
1309 /* Operations on 64-bit integers. */
1311 /* For 64-bit integers, we use only the `large_{ref,set}' variant. */
1313 SCM_DEFINE (scm_r6rs_bytevector_u64_ref, "bytevector-u64-ref",
1314 3, 0, 0,
1315 (SCM bv, SCM index, SCM endianness),
1316 "Return the unsigned 64-bit integer from @var{bv} at "
1317 "@var{index}.")
1318 #define FUNC_NAME s_scm_r6rs_bytevector_u64_ref
1320 LARGE_INTEGER_REF (64, unsigned);
1322 #undef FUNC_NAME
1324 SCM_DEFINE (scm_r6rs_bytevector_s64_ref, "bytevector-s64-ref",
1325 3, 0, 0,
1326 (SCM bv, SCM index, SCM endianness),
1327 "Return the signed 64-bit integer from @var{bv} at "
1328 "@var{index}.")
1329 #define FUNC_NAME s_scm_r6rs_bytevector_s64_ref
1331 LARGE_INTEGER_REF (64, signed);
1333 #undef FUNC_NAME
1335 SCM_DEFINE (scm_r6rs_bytevector_u64_native_ref, "bytevector-u64-native-ref",
1336 2, 0, 0,
1337 (SCM bv, SCM index),
1338 "Return the unsigned 64-bit integer from @var{bv} at "
1339 "@var{index} using the native endianness.")
1340 #define FUNC_NAME s_scm_r6rs_bytevector_u64_native_ref
1342 LARGE_INTEGER_NATIVE_REF (64, unsigned);
1344 #undef FUNC_NAME
1346 SCM_DEFINE (scm_r6rs_bytevector_s64_native_ref, "bytevector-s64-native-ref",
1347 2, 0, 0,
1348 (SCM bv, SCM index),
1349 "Return the unsigned 64-bit integer from @var{bv} at "
1350 "@var{index} using the native endianness.")
1351 #define FUNC_NAME s_scm_r6rs_bytevector_u64_native_ref
1353 LARGE_INTEGER_NATIVE_REF (64, signed);
1355 #undef FUNC_NAME
1357 SCM_DEFINE (scm_r6rs_bytevector_u64_set_x, "bytevector-u64-set!",
1358 4, 0, 0,
1359 (SCM bv, SCM index, SCM value, SCM endianness),
1360 "Store @var{value} in @var{bv} at @var{index} according to "
1361 "@var{endianness}.")
1362 #define FUNC_NAME s_scm_r6rs_bytevector_u64_set_x
1364 LARGE_INTEGER_SET (64, unsigned);
1366 #undef FUNC_NAME
1368 SCM_DEFINE (scm_r6rs_bytevector_s64_set_x, "bytevector-s64-set!",
1369 4, 0, 0,
1370 (SCM bv, SCM index, SCM value, SCM endianness),
1371 "Store @var{value} in @var{bv} at @var{index} according to "
1372 "@var{endianness}.")
1373 #define FUNC_NAME s_scm_r6rs_bytevector_s64_set_x
1375 LARGE_INTEGER_SET (64, signed);
1377 #undef FUNC_NAME
1379 SCM_DEFINE (scm_r6rs_bytevector_u64_native_set_x, "bytevector-u64-native-set!",
1380 3, 0, 0,
1381 (SCM bv, SCM index, SCM value),
1382 "Store the unsigned integer @var{value} at index @var{index} "
1383 "of @var{bv} using the native endianness.")
1384 #define FUNC_NAME s_scm_r6rs_bytevector_u64_native_set_x
1386 LARGE_INTEGER_NATIVE_SET (64, unsigned);
1388 #undef FUNC_NAME
1390 SCM_DEFINE (scm_r6rs_bytevector_s64_native_set_x, "bytevector-s64-native-set!",
1391 3, 0, 0,
1392 (SCM bv, SCM index, SCM value),
1393 "Store the signed integer @var{value} at index @var{index} "
1394 "of @var{bv} using the native endianness.")
1395 #define FUNC_NAME s_scm_r6rs_bytevector_u64_native_set_x
1397 LARGE_INTEGER_NATIVE_SET (64, signed);
1399 #undef FUNC_NAME
1403 /* Operations on IEEE-754 numbers. */
1405 /* XXX: There are not only two encodings (big and little endian), as implied
1406 by the API, but rather three (in the case of little endian, there are two
1407 possible word endians, as visible in glibc's <ieee754.h>). When the
1408 endianness is `little', we assume little endian for both the byte order
1409 and the word order. */
1411 /* Convert to/from a floating-point number with different endianness. This
1412 method is probably not the most efficient but it should be portable. */
1414 static inline void
1415 float_to_foreign_endianness (union scm_r6rs_ieee754_float *target,
1416 float source)
1418 union scm_r6rs_ieee754_float src;
1420 src.f = source;
1422 #ifdef WORDS_BIGENDIAN
1423 /* Assuming little endian for both byte and word order. */
1424 target->little_endian.negative = src.big_endian.negative;
1425 target->little_endian.exponent = src.big_endian.exponent;
1426 target->little_endian.mantissa = src.big_endian.mantissa;
1427 #else
1428 target->big_endian.negative = src.little_endian.negative;
1429 target->big_endian.exponent = src.little_endian.exponent;
1430 target->big_endian.mantissa = src.little_endian.mantissa;
1431 #endif
1434 static inline float
1435 float_from_foreign_endianness (const union scm_r6rs_ieee754_float *source)
1437 union scm_r6rs_ieee754_float result;
1439 #ifdef WORDS_BIGENDIAN
1440 /* Assuming little endian for both byte and word order. */
1441 result.big_endian.negative = source->little_endian.negative;
1442 result.big_endian.exponent = source->little_endian.exponent;
1443 result.big_endian.mantissa = source->little_endian.mantissa;
1444 #else
1445 result.little_endian.negative = source->big_endian.negative;
1446 result.little_endian.exponent = source->big_endian.exponent;
1447 result.little_endian.mantissa = source->big_endian.mantissa;
1448 #endif
1450 return (result.f);
1453 static inline void
1454 double_to_foreign_endianness (union scm_r6rs_ieee754_double *target,
1455 double source)
1457 union scm_r6rs_ieee754_double src;
1459 src.d = source;
1461 #ifdef WORDS_BIGENDIAN
1462 /* Assuming little endian for both byte and word order. */
1463 target->little_little_endian.negative = src.big_endian.negative;
1464 target->little_little_endian.exponent = src.big_endian.exponent;
1465 target->little_little_endian.mantissa0 = src.big_endian.mantissa0;
1466 target->little_little_endian.mantissa1 = src.big_endian.mantissa1;
1467 #else
1468 target->big_endian.negative = src.little_little_endian.negative;
1469 target->big_endian.exponent = src.little_little_endian.exponent;
1470 target->big_endian.mantissa0 = src.little_little_endian.mantissa0;
1471 target->big_endian.mantissa1 = src.little_little_endian.mantissa1;
1472 #endif
1475 static inline double
1476 double_from_foreign_endianness (const union scm_r6rs_ieee754_double *source)
1478 union scm_r6rs_ieee754_double result;
1480 #ifdef WORDS_BIGENDIAN
1481 /* Assuming little endian for both byte and word order. */
1482 result.big_endian.negative = source->little_little_endian.negative;
1483 result.big_endian.exponent = source->little_little_endian.exponent;
1484 result.big_endian.mantissa0 = source->little_little_endian.mantissa0;
1485 result.big_endian.mantissa1 = source->little_little_endian.mantissa1;
1486 #else
1487 result.little_little_endian.negative = source->big_endian.negative;
1488 result.little_little_endian.exponent = source->big_endian.exponent;
1489 result.little_little_endian.mantissa0 = source->big_endian.mantissa0;
1490 result.little_little_endian.mantissa1 = source->big_endian.mantissa1;
1491 #endif
1493 return (result.d);
1496 /* Template macros to abstract over doubles and floats.
1497 XXX: Guile can only convert to/from doubles. */
1498 #define IEEE754_UNION(_c_type) union scm_r6rs_ieee754_ ## _c_type
1499 #define IEEE754_TO_SCM(_c_type) scm_from_double
1500 #define IEEE754_FROM_SCM(_c_type) scm_to_double
1501 #define IEEE754_FROM_FOREIGN_ENDIANNESS(_c_type) \
1502 _c_type ## _from_foreign_endianness
1503 #define IEEE754_TO_FOREIGN_ENDIANNESS(_c_type) \
1504 _c_type ## _to_foreign_endianness
1507 /* Templace getters and setters. */
1509 #define IEEE754_ACCESSOR_PROLOGUE(_type) \
1510 INTEGER_ACCESSOR_PROLOGUE (sizeof (_type) << 3UL, signed);
1512 #define IEEE754_REF(_type) \
1513 _type c_result; \
1515 IEEE754_ACCESSOR_PROLOGUE (_type); \
1516 SCM_VALIDATE_SYMBOL (3, endianness); \
1518 if (scm_is_eq (endianness, native_endianness)) \
1519 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1520 else \
1522 IEEE754_UNION (_type) c_raw; \
1524 memcpy (&c_raw, &c_bv[c_index], sizeof (c_raw)); \
1525 c_result = \
1526 IEEE754_FROM_FOREIGN_ENDIANNESS (_type) (&c_raw); \
1529 return (IEEE754_TO_SCM (_type) (c_result));
1531 #define IEEE754_NATIVE_REF(_type) \
1532 _type c_result; \
1534 IEEE754_ACCESSOR_PROLOGUE (_type); \
1536 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1537 return (IEEE754_TO_SCM (_type) (c_result));
1539 #define IEEE754_SET(_type) \
1540 _type c_value; \
1542 IEEE754_ACCESSOR_PROLOGUE (_type); \
1543 SCM_VALIDATE_REAL (3, value); \
1544 SCM_VALIDATE_SYMBOL (4, endianness); \
1545 c_value = IEEE754_FROM_SCM (_type) (value); \
1547 if (scm_is_eq (endianness, native_endianness)) \
1548 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1549 else \
1551 IEEE754_UNION (_type) c_raw; \
1553 IEEE754_TO_FOREIGN_ENDIANNESS (_type) (&c_raw, c_value); \
1554 memcpy (&c_bv[c_index], &c_raw, sizeof (c_raw)); \
1557 return SCM_UNSPECIFIED;
1559 #define IEEE754_NATIVE_SET(_type) \
1560 _type c_value; \
1562 IEEE754_ACCESSOR_PROLOGUE (_type); \
1563 SCM_VALIDATE_REAL (3, value); \
1564 c_value = IEEE754_FROM_SCM (_type) (value); \
1566 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1567 return SCM_UNSPECIFIED;
1570 /* Single precision. */
1572 SCM_DEFINE (scm_r6rs_bytevector_ieee_single_ref,
1573 "bytevector-ieee-single-ref",
1574 3, 0, 0,
1575 (SCM bv, SCM index, SCM endianness),
1576 "Return the IEEE-754 single from @var{bv} at "
1577 "@var{index}.")
1578 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_single_ref
1580 IEEE754_REF (float);
1582 #undef FUNC_NAME
1584 SCM_DEFINE (scm_r6rs_bytevector_ieee_single_native_ref,
1585 "bytevector-ieee-single-native-ref",
1586 2, 0, 0,
1587 (SCM bv, SCM index),
1588 "Return the IEEE-754 single from @var{bv} at "
1589 "@var{index} using the native endianness.")
1590 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_single_native_ref
1592 IEEE754_NATIVE_REF (float);
1594 #undef FUNC_NAME
1596 SCM_DEFINE (scm_r6rs_bytevector_ieee_single_set_x,
1597 "bytevector-ieee-single-set!",
1598 4, 0, 0,
1599 (SCM bv, SCM index, SCM value, SCM endianness),
1600 "Store real @var{value} in @var{bv} at @var{index} according to "
1601 "@var{endianness}.")
1602 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_single_set_x
1604 IEEE754_SET (float);
1606 #undef FUNC_NAME
1608 SCM_DEFINE (scm_r6rs_bytevector_ieee_single_native_set_x,
1609 "bytevector-ieee-single-native-set!",
1610 3, 0, 0,
1611 (SCM bv, SCM index, SCM value),
1612 "Store the real @var{value} at index @var{index} "
1613 "of @var{bv} using the native endianness.")
1614 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_single_native_set_x
1616 IEEE754_NATIVE_SET (float);
1618 #undef FUNC_NAME
1621 /* Double precision. */
1623 SCM_DEFINE (scm_r6rs_bytevector_ieee_double_ref,
1624 "bytevector-ieee-double-ref",
1625 3, 0, 0,
1626 (SCM bv, SCM index, SCM endianness),
1627 "Return the IEEE-754 double from @var{bv} at "
1628 "@var{index}.")
1629 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_double_ref
1631 IEEE754_REF (double);
1633 #undef FUNC_NAME
1635 SCM_DEFINE (scm_r6rs_bytevector_ieee_double_native_ref,
1636 "bytevector-ieee-double-native-ref",
1637 2, 0, 0,
1638 (SCM bv, SCM index),
1639 "Return the IEEE-754 double from @var{bv} at "
1640 "@var{index} using the native endianness.")
1641 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_double_native_ref
1643 IEEE754_NATIVE_REF (double);
1645 #undef FUNC_NAME
1647 SCM_DEFINE (scm_r6rs_bytevector_ieee_double_set_x,
1648 "bytevector-ieee-double-set!",
1649 4, 0, 0,
1650 (SCM bv, SCM index, SCM value, SCM endianness),
1651 "Store real @var{value} in @var{bv} at @var{index} according to "
1652 "@var{endianness}.")
1653 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_double_set_x
1655 IEEE754_SET (double);
1657 #undef FUNC_NAME
1659 SCM_DEFINE (scm_r6rs_bytevector_ieee_double_native_set_x,
1660 "bytevector-ieee-double-native-set!",
1661 3, 0, 0,
1662 (SCM bv, SCM index, SCM value),
1663 "Store the real @var{value} at index @var{index} "
1664 "of @var{bv} using the native endianness.")
1665 #define FUNC_NAME s_scm_r6rs_bytevector_ieee_double_native_set_x
1667 IEEE754_NATIVE_SET (double);
1669 #undef FUNC_NAME
1672 #undef IEEE754_UNION
1673 #undef IEEE754_TO_SCM
1674 #undef IEEE754_FROM_SCM
1675 #undef IEEE754_FROM_FOREIGN_ENDIANNESS
1676 #undef IEEE754_TO_FOREIGN_ENDIANNESS
1677 #undef IEEE754_REF
1678 #undef IEEE754_NATIVE_REF
1679 #undef IEEE754_SET
1680 #undef IEEE754_NATIVE_SET
1683 /* Operations on strings. */
1686 /* Produce a function that returns the length of a UTF-encoded string. */
1687 #define UTF_STRLEN_FUNCTION(_utf_width) \
1688 static inline size_t \
1689 utf ## _utf_width ## _strlen (const uint ## _utf_width ## _t *str) \
1691 size_t len = 0; \
1692 const uint ## _utf_width ## _t *ptr; \
1693 for (ptr = str; \
1694 *ptr != 0; \
1695 ptr++) \
1697 len++; \
1700 return (len * ((_utf_width) / 8)); \
1703 UTF_STRLEN_FUNCTION (8)
1706 /* Return the length (in bytes) of STR, a UTF-(UTF_WIDTH) encoded string. */
1707 #define UTF_STRLEN(_utf_width, _str) \
1708 utf ## _utf_width ## _strlen (_str)
1710 /* Return the "portable" name of the UTF encoding of size UTF_WIDTH and
1711 ENDIANNESS (Gnulib's `iconv_open' module guarantees the portability of the
1712 encoding name). */
1713 static inline void
1714 utf_encoding_name (char *name, size_t utf_width, SCM endianness)
1716 strcpy (name, "UTF-");
1717 strcat (name, ((utf_width == 8)
1718 ? "8"
1719 : ((utf_width == 16)
1720 ? "16"
1721 : ((utf_width == 32)
1722 ? "32"
1723 : "??"))));
1724 strcat (name,
1725 ((scm_is_eq (endianness, scm_sym_big))
1726 ? "BE"
1727 : ((scm_is_eq (endianness, scm_sym_little))
1728 ? "LE"
1729 : "unknown")));
1732 /* Maximum length of a UTF encoding name. */
1733 #define MAX_UTF_ENCODING_NAME_LEN 16
1735 /* Produce the body of a `string->utf' function. */
1736 #define STRING_TO_UTF(_utf_width) \
1737 SCM utf; \
1738 int err; \
1739 char *c_str; \
1740 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1741 char *c_utf = NULL, *c_locale; \
1742 size_t c_strlen, c_raw_strlen, c_utf_len = 0; \
1744 SCM_VALIDATE_STRING (1, str); \
1745 if (endianness == SCM_UNDEFINED) \
1746 endianness = scm_sym_big; \
1747 else \
1748 SCM_VALIDATE_SYMBOL (2, endianness); \
1750 c_strlen = scm_c_string_length (str); \
1751 c_raw_strlen = c_strlen * ((_utf_width) / 8); \
1752 do \
1754 c_str = (char *) alloca (c_raw_strlen + 1); \
1755 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen); \
1757 while (c_raw_strlen > c_strlen); \
1758 c_str[c_raw_strlen] = '\0'; \
1760 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1762 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
1763 strcpy (c_locale, locale_charset ()); \
1765 err = mem_iconveh (c_str, c_raw_strlen, \
1766 c_locale, c_utf_name, \
1767 iconveh_question_mark, NULL, \
1768 &c_utf, &c_utf_len); \
1769 if (EXPECT_FALSE (err)) \
1770 scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \
1771 scm_list_1 (str), err); \
1772 else \
1773 /* C_UTF is null-terminated. */ \
1774 utf = scm_r6rs_c_take_bytevector ((signed char *) c_utf, \
1775 c_utf_len); \
1777 return (utf);
1781 SCM_DEFINE (scm_r6rs_string_to_utf8, "string->utf8",
1782 1, 0, 0,
1783 (SCM str),
1784 "Return a newly allocated bytevector that contains the UTF-8 "
1785 "encoding of @var{str}.")
1786 #define FUNC_NAME s_scm_r6rs_string_to_utf8
1788 SCM utf;
1789 char *c_str;
1790 uint8_t *c_utf;
1791 size_t c_strlen, c_raw_strlen;
1793 SCM_VALIDATE_STRING (1, str);
1795 c_strlen = scm_c_string_length (str);
1796 c_raw_strlen = c_strlen;
1799 c_str = (char *) alloca (c_raw_strlen + 1);
1800 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen);
1802 while (c_raw_strlen > c_strlen);
1803 c_str[c_raw_strlen] = '\0';
1805 c_utf = u8_strconv_from_locale (c_str);
1806 if (EXPECT_FALSE (c_utf == NULL))
1807 scm_syserror (FUNC_NAME);
1808 else
1809 /* C_UTF is null-terminated. */
1810 utf = scm_r6rs_c_take_bytevector ((signed char *) c_utf,
1811 UTF_STRLEN (8, c_utf));
1813 return (utf);
1815 #undef FUNC_NAME
1817 SCM_DEFINE (scm_r6rs_string_to_utf16, "string->utf16",
1818 1, 1, 0,
1819 (SCM str, SCM endianness),
1820 "Return a newly allocated bytevector that contains the UTF-16 "
1821 "encoding of @var{str}.")
1822 #define FUNC_NAME s_scm_r6rs_string_to_utf16
1824 STRING_TO_UTF (16);
1826 #undef FUNC_NAME
1828 SCM_DEFINE (scm_r6rs_string_to_utf32, "string->utf32",
1829 1, 1, 0,
1830 (SCM str, SCM endianness),
1831 "Return a newly allocated bytevector that contains the UTF-32 "
1832 "encoding of @var{str}.")
1833 #define FUNC_NAME s_scm_r6rs_string_to_utf32
1835 STRING_TO_UTF (32);
1837 #undef FUNC_NAME
1840 /* Produce the body of a function that converts a UTF-encoded bytevector to a
1841 string. */
1842 #define UTF_TO_STRING(_utf_width) \
1843 SCM str = SCM_BOOL_F; \
1844 int err; \
1845 char *c_str = NULL, *c_locale; \
1846 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1847 const char *c_utf; \
1848 size_t c_strlen = 0, c_utf_len; \
1850 SCM_VALIDATE_R6RS_BYTEVECTOR (1, utf); \
1851 if (endianness == SCM_UNDEFINED) \
1852 endianness = scm_sym_big; \
1853 else \
1854 SCM_VALIDATE_SYMBOL (2, endianness); \
1856 c_utf_len = SCM_R6RS_BYTEVECTOR_LENGTH (utf); \
1857 c_utf = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (utf); \
1858 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1860 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
1861 strcpy (c_locale, locale_charset ()); \
1863 err = mem_iconveh (c_utf, c_utf_len, \
1864 c_utf_name, c_locale, \
1865 iconveh_question_mark, NULL, \
1866 &c_str, &c_strlen); \
1867 if (EXPECT_FALSE (err)) \
1868 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A", \
1869 scm_list_1 (utf), err); \
1870 else \
1871 /* C_STR is null-terminated. */ \
1872 str = scm_take_locale_stringn (c_str, c_strlen); \
1874 return (str);
1877 SCM_DEFINE (scm_r6rs_utf8_to_string, "utf8->string",
1878 1, 0, 0,
1879 (SCM utf),
1880 "Return a newly allocate string that contains from the UTF-8-"
1881 "encoded contents of bytevector @var{utf}.")
1882 #define FUNC_NAME s_scm_r6rs_utf8_to_string
1884 SCM str;
1885 int err;
1886 char *c_str = NULL, *c_locale;
1887 const char *c_utf;
1888 size_t c_utf_len, c_strlen = 0;
1890 SCM_VALIDATE_R6RS_BYTEVECTOR (1, utf);
1892 c_utf_len = SCM_R6RS_BYTEVECTOR_LENGTH (utf);
1894 c_locale = (char *) alloca (strlen (locale_charset ()) + 1);
1895 strcpy (c_locale, locale_charset ());
1897 c_utf = (char *) SCM_R6RS_BYTEVECTOR_CONTENTS (utf);
1898 err = mem_iconveh (c_utf, c_utf_len,
1899 "UTF-8", c_locale,
1900 iconveh_question_mark, NULL,
1901 &c_str, &c_strlen);
1902 if (EXPECT_FALSE (err))
1903 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A",
1904 scm_list_1 (utf), err);
1905 else
1906 /* C_STR is null-terminated. */
1907 str = scm_take_locale_stringn (c_str, c_strlen);
1909 return (str);
1911 #undef FUNC_NAME
1913 SCM_DEFINE (scm_r6rs_utf16_to_string, "utf16->string",
1914 1, 1, 0,
1915 (SCM utf, SCM endianness),
1916 "Return a newly allocate string that contains from the UTF-17-"
1917 "encoded contents of bytevector @var{utf}.")
1918 #define FUNC_NAME s_scm_r6rs_utf16_to_string
1920 UTF_TO_STRING (16);
1922 #undef FUNC_NAME
1924 SCM_DEFINE (scm_r6rs_utf32_to_string, "utf32->string",
1925 1, 1, 0,
1926 (SCM utf, SCM endianness),
1927 "Return a newly allocate string that contains from the UTF-17-"
1928 "encoded contents of bytevector @var{utf}.")
1929 #define FUNC_NAME s_scm_r6rs_utf32_to_string
1931 UTF_TO_STRING (32);
1933 #undef FUNC_NAME
1937 /* Initialization. */
1939 void
1940 scm_init_r6rs_bytevector (void)
1942 #include "bytevector.x"
1944 #ifdef WORDS_BIGENDIAN
1945 native_endianness = scm_sym_big;
1946 #else
1947 native_endianness = scm_sym_little;
1948 #endif
1950 scm_r6rs_null_bytevector =
1951 scm_gc_protect_object (make_bytevector_from_buffer (0, NULL));
1954 /* arch-tag: e5a30664-90fc-40d7-8036-ee767aea5fb6