doc: Document low-level C macros.
[guile-r6rs-libs.git] / doc / api-r6rs.texi
blob59123f52ca34495d74a700dee4be40856ec7ae0a
1 @cindex R6RS
2 @cindex R6RS libraries
4 This section describes Guile's implementation of some of the standard
5 libraries defined in the ``Revised^6 Report on the Algorithmic
6 Language'' aka. @url{http://www.r6rs.org/, R6RS}.
8 @menu
9 * Bytevectors::                 Interpreting raw bit strings.
10 @end menu
12 @c *********************************************************************
13 @node Bytevectors
14 @section Bytevectors
16 @cindex bytevector
18 A @dfn{bytevector} is a raw bit string.  The @code{(rnrs bytevector)}
19 module provides procedures to manipulate bytevectors and interpret their
20 contents in a number of ways: bytevector contents can be accessed as
21 signed or unsigned integer of various sizes and endianness, as IEEE-754
22 floating point numbers, or as strings.  It is a useful tool to decode
23 binary data.
25 @menu
26 * Bytevector Endianness::       Dealing with byte order.
27 * Bytevector Manipulation::     Creating, copying, manipulating bytevectors.
28 * Bytevectors as Integers::     Interpreting bytes as integers.
29 * Bytevectors and Integer Lists::  Converting to/from an integer list.
30 * Bytevectors as Floats::       Interpreting bytes as real numbers.
31 * Bytevectors as Strings::      Interpreting bytes as Unicode strings.
32 @end menu
34 @node Bytevector Endianness
35 @subsection Endianness
37 @cindex endianness
38 @cindex byte order
39 @cindex word order
41 Some of the following procedures take an @var{endianness} parameter.
42 The @dfn{endianness} is defined is defined as the order of bytes in
43 multi-byte numbers: numbers encoded in @dfn{big endian} have their most
44 significant bytes written first, whereas numbers encoded in @dfn{little
45 endian} have their least significant bytes first@footnote{Big and little
46 endian are the most common ``endiannesses'' but others exist.  For
47 instance, the GNU MP library allows @dfn{word order} to be specified
48 independently of @dfn{byte order} (@pxref{Integer Import and Export,,,
49 gmp, The GNU Multiple Precision Arithmetic Library Manual}).}  Little
50 endian is the native endianness of the IA32 architecture and its
51 derivatives, while big endian is native to SPARC and PowerPC, among
52 others.  The @code{native-endianness} procedure returns the native
53 endianness of the machine it runs on.
55 @deffn {Scheme Procedure} native-endianness
56 @deffnx {C Function} scm_r6rs_native_endianness ()
57 Return a value denoting the native endianness of the host machine.
58 @end deffn
60 @deffn {Scheme Macro} endianness symbol
61 Return an object denoting the endianness specified by @var{symbol}.  If
62 @var{symbol} is neither @code{big} nor @code{little} then a compile-time
63 error is raised.
64 @end deffn
66 @defvr {C Variable} scm_r6rs_endianness_big
67 @defvrx {C Variable} scm_r6rs_endianness_little
68 The objects denoting big (resp. little) endianness.
69 @end defvr
72 @node Bytevector Manipulation
73 @subsection Manipulating Bytevectors
75 Bytevectors can be created, copied, and analyzed with the following
76 procedures.
78 @deffn {Scheme Procedure} make-bytevector len [fill]
79 @deffnx {C Function} scm_r6rs_make_bytevector (len, fill)
80 @deffnx {C Function} scm_r6rs_c_make_bytevector (unsigned len)
81 Return a new bytevector of @var{len} bytes.  Optionally, if @var{fill}
82 is given, fill it with @var{fill}; @var{fill} must be an 8-bit signed
83 integer, i.e., in the range [-128,127].
84 @end deffn
86 @deffn {Scheme Procedure} bytevector? obj
87 @deffnx {C Function} scm_r6rs_bytevector_p (obj)
88 Return true if @var{obj} is a bytevector.
89 @end deffn
91 @deffn {Scheme Procedure} bytevector-length bv
92 @deffnx {C Function} scm_r6rs_bytevector_length (bv)
93 Return the length in bytes of bytevector @var{bv}.
94 @end deffn
96 @deffn {Scheme Procedure} bytevector=? bv1 bv2
97 @deffnx {C Function} scm_r6rs_bytevector_eq_p (bv1, bv2)
98 Return is @var{bv1} equals to @var{bv2}---i.e., if they have the same
99 length and contents.
100 @end deffn
102 @deffn {Scheme Procedure} bytevector-fill! bv fill
103 @deffnx {C Function} scm_r6rs_bytevector_fill_x (bv, fill)
104 Fill bytevector @var{bv} with @var{fill}, a byte.
105 @end deffn
107 @deffn {Scheme Procedure} bytevector-copy! source source-start target target-start len
108 @deffnx {C Function} scm_r6rs_bytevector_copy_x (source, source_start, target, target_start, len)
109 Copy @var{len} bytes from @var{source} into @var{target}, starting
110 reading from @var{source-start} (a positive index within @var{source})
111 and start writing at @var{target-start}.
112 @end deffn
114 @deffn {Scheme Procedure} bytevector-copy bv
115 @deffnx {C Function} scm_r6rs_bytevector_copy (bv)
116 Return a newly allocated copy of @var{bv}.
117 @end deffn
119 Low-level C macros are available.  They do not perform any
120 type-checking; as such they should be used with care.
122 @deftypefn {C Macro} size_t SCM_R6RS_BYTEVECTOR_LENGTH (bv)
123 Return the length in bytes of bytevector @var{bv}.
124 @end deftypefn
126 @deftypefn {C Macro} {signed char *} SCM_R6RS_BYTEVECTOR_CONTENTS (bv)
127 Return a pointer to the contents of bytevector @var{bv}.
128 @end deftypefn
131 @node Bytevectors as Integers
132 @subsection Interpreting Bytevector Contents as Integers
134 The contents of a bytevector can be interpreted as a sequence of
135 integers of any given size, sign, and endianness.
137 @lisp
138 (let ((bv (make-bytevector 4)))
139   (bytevector-u8-set! bv 0 #x12)
140   (bytevector-u8-set! bv 1 #x34)
141   (bytevector-u8-set! bv 2 #x56)
142   (bytevector-u8-set! bv 3 #x78)
144   (map (lambda (number)
145          (number->string number 16))
146        (list (bytevector-u8-ref bv 0)
147              (bytevector-u16-ref bv 0 (endianness big))
148              (bytevector-u32-ref bv 0 (endianness little)))))
150 @result{} ("12" "1234" "78563412")
151 @end lisp
153 The most generic procedures to interpret bytevector contents as integers
154 are described below.
156 @deffn {Scheme Procedure} bytevector-uint-ref bv index endianness size
157 @deffnx {Scheme Procedure} bytevector-sint-ref bv index endianness size
158 @deffnx {C Function} scm_r6rs_bytevector_uint_ref (bv, index, endianness, size)
159 @deffnx {C Function} scm_r6rs_bytevector_sint_ref (bv, index, endianness, size)
160 Return the @var{size}-byte long unsigned (resp. signed) integer at
161 index @var{index} in @var{bv}, decoded according to @var{endianness}.
162 @end deffn
164 @deffn {Scheme Procedure} bytevector-uint-set! bv index value endianness size
165 @deffnx {Scheme Procedure} bytevector-sint-set! bv index value endianness size
166 @deffnx {C Function} scm_r6rs_bytevector_uint_set_x (bv, index, value, endianness, size)
167 @deffnx {C Function} scm_r6rs_bytevector_sint_set_x (bv, index, value, endianness, size)
168 Set the @var{size}-byte long unsigned (resp. signed) integer at
169 @var{index} to @var{value}, encoded according to @var{endianness}.
170 @end deffn
172 The following procedures are similar to the ones above, but specialized
173 to a given integer size:
175 @deffn {Scheme Procedure} bytevector-u8-ref bv index
176 @deffnx {Scheme Procedure} bytevector-s8-ref bv index
177 @deffnx {Scheme Procedure} bytevector-u16-ref bv index endianness
178 @deffnx {Scheme Procedure} bytevector-s16-ref bv index endianness
179 @deffnx {Scheme Procedure} bytevector-u32-ref bv index endianness
180 @deffnx {Scheme Procedure} bytevector-s32-ref bv index endianness
181 @deffnx {Scheme Procedure} bytevector-u64-ref bv index endianness
182 @deffnx {Scheme Procedure} bytevector-s64-ref bv index endianness
183 @deffnx {C Function} scm_r6rs_bytevector_u8_ref (bv, index)
184 @deffnx {C Function} scm_r6rs_bytevector_s8_ref (bv, index)
185 @deffnx {C Function} scm_r6rs_bytevector_u16_ref (bv, index, endianness)
186 @deffnx {C Function} scm_r6rs_bytevector_s16_ref (bv, index, endianness)
187 @deffnx {C Function} scm_r6rs_bytevector_u32_ref (bv, index, endianness)
188 @deffnx {C Function} scm_r6rs_bytevector_s32_ref (bv, index, endianness)
189 @deffnx {C Function} scm_r6rs_bytevector_u64_ref (bv, index, endianness)
190 @deffnx {C Function} scm_r6rs_bytevector_s64_ref (bv, index, endianness)
191 Return the unsigned @var{n}-bit (signed) integer (where @var{n} is 8,
192 16, 32 or 64) from @var{bv} at @var{index}, decoded according to
193 @var{endianness}.
194 @end deffn
196 @deffn {Scheme Procedure} bytevector-u8-set! bv index value
197 @deffnx {Scheme Procedure} bytevector-s8-set! bv index value
198 @deffnx {Scheme Procedure} bytevector-u16-set! bv index value endianness
199 @deffnx {Scheme Procedure} bytevector-s16-set! bv index value endianness
200 @deffnx {Scheme Procedure} bytevector-u32-set! bv index value endianness
201 @deffnx {Scheme Procedure} bytevector-s32-set! bv index value endianness
202 @deffnx {Scheme Procedure} bytevector-u64-set! bv index value endianness
203 @deffnx {Scheme Procedure} bytevector-s64-set! bv index value endianness
204 @deffnx {C Function} scm_r6rs_bytevector_u8_set_x (bv, index, value)
205 @deffnx {C Function} scm_r6rs_bytevector_s8_set_x (bv, index, value)
206 @deffnx {C Function} scm_r6rs_bytevector_u16_set_x (bv, index, value, endianness)
207 @deffnx {C Function} scm_r6rs_bytevector_s16_set_x (bv, index, value, endianness)
208 @deffnx {C Function} scm_r6rs_bytevector_u32_set_x (bv, index, value, endianness)
209 @deffnx {C Function} scm_r6rs_bytevector_s32_set_x (bv, index, value, endianness)
210 @deffnx {C Function} scm_r6rs_bytevector_u64_set_x (bv, index, value, endianness)
211 @deffnx {C Function} scm_r6rs_bytevector_s64_set_x (bv, index, value, endianness)
212 Store @var{value} as an @var{n}-bit (signed) integer (where @var{n} is
213 8, 16, 32 or 64) in @var{bv} at @var{index}, encoded according to
214 @var{endianness}.
215 @end deffn
217 Finally, a variant specialized for the host's endianness is available
218 for each of these functions (with the exception of the @code{u8}
219 accessors, for obvious reasons):
221 @deffn {Scheme Procedure} bytevector-u16-native-ref bv index
222 @deffnx {Scheme Procedure} bytevector-s16-native-ref bv index
223 @deffnx {Scheme Procedure} bytevector-u32-native-ref bv index
224 @deffnx {Scheme Procedure} bytevector-s32-native-ref bv index
225 @deffnx {Scheme Procedure} bytevector-u64-native-ref bv index
226 @deffnx {Scheme Procedure} bytevector-s64-native-ref bv index
227 @deffnx {C Function} scm_r6rs_bytevector_u16_native_ref (bv, index)
228 @deffnx {C Function} scm_r6rs_bytevector_s16_native_ref (bv, index)
229 @deffnx {C Function} scm_r6rs_bytevector_u32_native_ref (bv, index)
230 @deffnx {C Function} scm_r6rs_bytevector_s32_native_ref (bv, index)
231 @deffnx {C Function} scm_r6rs_bytevector_u64_native_ref (bv, index)
232 @deffnx {C Function} scm_r6rs_bytevector_s64_native_ref (bv, index)
233 Return the unsigned @var{n}-bit (signed) integer (where @var{n} is 8,
234 16, 32 or 64) from @var{bv} at @var{index}, decoded according to the
235 host's native endianness.
236 @end deffn
238 @deffn {Scheme Procedure} bytevector-u16-native-set! bv index value
239 @deffnx {Scheme Procedure} bytevector-s16-native-set! bv index value
240 @deffnx {Scheme Procedure} bytevector-u32-native-set! bv index value
241 @deffnx {Scheme Procedure} bytevector-s32-native-set! bv index value
242 @deffnx {Scheme Procedure} bytevector-u64-native-set! bv index value
243 @deffnx {Scheme Procedure} bytevector-s64-native-set! bv index value
244 @deffnx {C Function} scm_r6rs_bytevector_u16_native_set_x (bv, index, value)
245 @deffnx {C Function} scm_r6rs_bytevector_s16_native_set_x (bv, index, value)
246 @deffnx {C Function} scm_r6rs_bytevector_u32_native_set_x (bv, index, value)
247 @deffnx {C Function} scm_r6rs_bytevector_s32_native_set_x (bv, index, value)
248 @deffnx {C Function} scm_r6rs_bytevector_u64_native_set_x (bv, index, value)
249 @deffnx {C Function} scm_r6rs_bytevector_s64_native_set_x (bv, index, value)
250 Store @var{value} as an @var{n}-bit (signed) integer (where @var{n} is
251 8, 16, 32 or 64) in @var{bv} at @var{index}, encoded according to the
252 host's native endianness.
253 @end deffn
256 @node Bytevectors and Integer Lists
257 @subsection Converting Bytevectors to/from Integer Lists
259 Bytevector contents can readily be converted to/from lists of signed or
260 unsigned integers:
262 @lisp
263 (bytevector->sint-list (u8-list->bytevector (make-list 4 255))
264                        (endianness little) 2)
265 @result{} (-1 -1)
266 @end lisp
268 @deffn {Scheme Procedure} bytevector->u8-list bv
269 @deffnx {C Function} scm_r6rs_bytevector_to_u8_list (bv)
270 Return a newly allocated list of unsigned 8-bit integers from the
271 contents of @var{bv}.
272 @end deffn
274 @deffn {Scheme Procedure} u8-list->bytevector lst
275 @deffnx {C Function} scm_r6rs_u8_list_to_bytevector (lst)
276 Return a newly allocated bytevector consisting of the unsigned 8-bit
277 integers listed in @var{lst}.
278 @end deffn
280 @deffn {Scheme Procedure} bytevector->uint-list bv endianness size
281 @deffnx {Scheme Procedure} bytevector->sint-list bv endianness size
282 @deffnx {C Function} scm_r6rs_bytevector_to_uint_list (bv, endianness, size)
283 @deffnx {C Function} scm_r6rs_bytevector_to_sint_list (bv, endianness, size)
284 Return a list of unsigned (resp. signed) integers of @var{size} bytes
285 representing the contents of @var{bv}, decoded according to
286 @var{endianness}.
287 @end deffn
290 @node Bytevectors as Floats
291 @subsection Interpreting Bytevector Contents as Floating Point Numbers
293 @cindex IEEE-754 floating point numbers
295 Bytevector contents can also be accessed as IEEE-754 single- or
296 double-precision floating point numbers (respectively 32 and 64-bit
297 long) using the procedures described here.
299 @deffn {Scheme Procedure} bytevector-ieee-single-ref bv index endianness
300 @deffnx {Scheme Procedure} bytevector-ieee-double-ref bv index endianness
301 @deffnx {C Function} scm_r6rs_bytevector_ieee_single_ref (bv, index, endianness)
302 @deffnx {C Function} scm_r6rs_bytevector_ieee_double_ref (bv, index, endianness)
303 Return the IEEE-754 single-precision floating point number from @var{bv}
304 at @var{index} according to @var{endianness}.
305 @end deffn
307 @deffn {Scheme Procedure} bytevector-ieee-single-set! bv index value endianness
308 @deffnx {Scheme Procedure} bytevector-ieee-double-set! bv index value endianness
309 @deffnx scm_r6rs_bytevector_ieee_single_set_x (bv, index, value, endianness)
310 @deffnx scm_r6rs_bytevector_ieee_double_set_x (bv, index, value, endianness)
311 Store real number @var{value} in @var{bv} at @var{index} according to
312 @var{endianness}.
313 @end deffn
315 Specialized procedures are also available:
317 @deffn {Scheme Procedure} bytevector-ieee-single-native-ref bv index
318 @deffnx {Scheme Procedure} bytevector-ieee-double-native-ref bv index
319 @deffnx {C Function} scm_r6rs_bytevector_ieee_single_native_ref (bv, index)
320 @deffnx {C Function} scm_r6rs_bytevector_ieee_double_native_ref (bv, index)
321 Return the IEEE-754 single-precision floating point number from @var{bv}
322 at @var{index} according to the host's native endianness.
323 @end deffn
325 @deffn {Scheme Procedure} bytevector-ieee-single-native-set! bv index value
326 @deffnx {Scheme Procedure} bytevector-ieee-double-native-set! bv index value
327 @deffnx scm_r6rs_bytevector_ieee_single_native_set_x (bv, index, value)
328 @deffnx scm_r6rs_bytevector_ieee_double_native_set_x (bv, index, value)
329 Store real number @var{value} in @var{bv} at @var{index} according to
330 the host's native endianness.
331 @end deffn
334 @node Bytevectors as Strings
335 @subsection Interpreting Bytevector Contents as Unicode Strings
337 Bytevector contents can also be interpreted as Unicode strings encoded
338 in one of the most commonly available encoding formats@footnote{Guile
339 1.8 does @emph{not} support Unicode strings.  Therefore, the procedures
340 described here assume that Guile strings are internally encoded
341 according to the current locale.  For instance, if @code{$LC_CTYPE} is
342 @code{fr_FR.ISO-8859-1}, then @code{string->utf-8} @i{et al.} will
343 assume that Guile strings are Latin-1-encoded.}.
345 @lisp
346 (utf8->string (u8-list->bytevector '(99 97 102 101)))
347 @result{} "cafe"
349 (string->utf8 "caf@'e") ;; SMALL LATIN LETTER E WITH ACUTE ACCENT
350 @result{} #vu8(99 97 102 195 169)
351 @end lisp
353 @deffn {Scheme Procedure} string->utf8 str
354 @deffnx {Scheme Procedure} string->utf16 str
355 @deffnx {Scheme Procedure} string->utf32 str
356 @deffnx {C Function} scm_r6rs_string_to_utf8 (str)
357 @deffnx {C Function} scm_r6rs_string_to_utf16 (str)
358 @deffnx {C Function} scm_r6rs_string_to_utf32 (str)
359 Return a newly allocated bytevector that contains the UTF-8, UTF-16, or
360 UTF-32 (aka. UCS-4) encoding of @var{str}.
361 @end deffn
363 @deffn {Scheme Procedure} utf8->string utf
364 @deffnx {Scheme Procedure} utf16->string utf
365 @deffnx {Scheme Procedure} utf32->string utf
366 @deffnx {C Function} scm_r6rs_utf8_to_string (utf)
367 @deffnx {C Function} scm_r6rs_utf16_to_string (utf)
368 @deffnx {C Function} scm_r6rs_utf32_to_string (utf)
369 Return a newly allocated string that contains from the UTF-8-, UTF-16-,
370 or UTF-32-decoded contents of bytevector @var{utf}.
371 @end deffn
374 FIXME: Finish.
376 @c LocalWords:  Bytevectors bytevector bytevectors endianness  endian accessors
377 @c LocalWords:  endiannesses
379 @c Local Variables:
380 @c ispell-local-dictionary: "american"
381 @c End: