doc: Document bytevectors as strings.
[guile-r6rs-libs.git] / doc / api-r6rs.texi
blobc959b3bb598f98d888ce27a20682f1b1d1c6e7dc
1 @cindex R6RS
2 @cindex R6RS libraries
4 This section describes Guile's implementation of some of the standard
5 libraries defined in the ``Revised^6 Report on the Algorithmic
6 Language'' aka. @url{http://www.r6rs.org/, R6RS}.
8 @menu
9 * Bytevectors::                 Interpreting raw bit strings.
10 @end menu
12 @c *********************************************************************
13 @node Bytevectors
14 @section Bytevectors
16 @cindex bytevector
18 A @dfn{bytevector} is a raw bit string.  The @code{(rnrs bytevector)}
19 module provides procedures to manipulate bytevectors and interpret their
20 contents in a number of ways: bytevector contents can be accessed as
21 signed or unsigned integer of various sizes and endianness, as IEEE-754
22 floating point numbers, or as strings.  It is a useful tool to decode
23 binary data.
25 @menu
26 * Bytevector Endianness::       Dealing with byte order.
27 * Bytevector Manipulation::     Creating, copying, manipulating bytevectors.
28 * Bytevectors as Integers::     Interpreting bytes as integers.
29 * Bytevectors and Integer Lists::  Converting to/from an integer list.
30 * Bytevectors as Floats::       Interpreting bytes as real numbers.
31 * Bytevectors as Strings::      Interpreting bytes as Unicode strings.
32 @end menu
34 @node Bytevector Endianness
35 @subsection Endianness
37 @cindex endianness
38 @cindex byte order
39 @cindex word order
41 Some of the following procedures take an @var{endianness} parameter.
42 The @dfn{endianness} is defined is defined as the order of bytes in
43 multi-byte numbers: numbers encoded in @dfn{big endian} have their most
44 significant bytes written first, whereas numbers encoded in @dfn{little
45 endian} have their least significant bytes first@footnote{Big and little
46 endian are the most common ``endiannesses'' but others exist.  For
47 instance, the GNU MP library allows @dfn{word order} to be specified
48 independently of @dfn{byte order} (@pxref{Integer Import and Export,,,
49 gmp, The GNU Multiple Precision Arithmetic Library Manual}).}  Little
50 endian is the native endianness of the IA32 architecture and its
51 derivatives, while big endian is native to SPARC and PowerPC, among
52 others.  The @code{native-endianness} procedure returns the native
53 endianness of the machine it runs on.
55 @deffn {Scheme Procedure} native-endianness
56 @deffnx {C Function} scm_r6rs_native_endianness ()
57 Return a value denoting the native endianness of the host machine.
58 @end deffn
60 @deffn {Scheme Macro} endianness symbol
61 Return an object denoting the endianness specified by @var{symbol}.  If
62 @var{symbol} is neither @code{big} nor @code{little} then a compile-time
63 error is raised.
64 @end deffn
66 @defvr {C Variable} scm_r6rs_endianness_big
67 @defvrx {C Variable} scm_r6rs_endianness_little
68 The objects denoting big (resp. little) endianness.
69 @end defvr
72 @node Bytevector Manipulation
73 @subsection Manipulating Bytevectors
75 Bytevectors can be created, copied, and analyzed with the following
76 procedures.
78 @deffn {Scheme Procedure} make-bytevector len [fill]
79 @deffnx {C Function} scm_r6rs_make_bytevector (len, fill)
80 @deffnx {C Function} scm_r6rs_c_make_bytevector (unsigned len)
81 Return a new bytevector of @var{len} bytes.  Optionally, if @var{fill}
82 is given, fill it with @var{fill}; @var{fill} must be an 8-bit signed
83 integer, i.e., in the range [-128,127].
84 @end deffn
86 @deffn {Scheme Procedure} bytevector? obj
87 @deffnx {C Function} scm_r6rs_bytevector_p (obj)
88 Return true if @var{obj} is a bytevector.
89 @end deffn
91 @deffn {Scheme Procedure} bytevector-length bv
92 @deffnx {C Function} scm_r6rs_bytevector_length (bv)
93 Return the length in bytes of bytevector @var{bv}.
94 @end deffn
96 @deffn {Scheme Procedure} bytevector=? bv1 bv2
97 @deffnx {C Function} scm_r6rs_bytevector_eq_p (bv1, bv2)
98 Return is @var{bv1} equals to @var{bv2}---i.e., if they have the same
99 length and contents.
100 @end deffn
102 @deffn {Scheme Procedure} bytevector-fill! bv fill
103 @deffnx {C Function} scm_r6rs_bytevector_fill_x (bv, fill)
104 Fill bytevector @var{bv} with @var{fill}, a byte.
105 @end deffn
107 @deffn {Scheme Procedure} bytevector-copy! source source-start target target-start len
108 @deffnx {C Function} scm_r6rs_bytevector_copy_x (source, source_start, target, target_start, len)
109 Copy @var{len} bytes from @var{source} into @var{target}, starting
110 reading from @var{source-start} (a positive index within @var{source})
111 and start writing at @var{target-start}.
112 @end deffn
114 @deffn {Scheme Procedure} bytevector-copy bv
115 @deffnx {C Function} scm_r6rs_bytevector_copy (bv)
116 Return a newly allocated copy of @var{bv}.
117 @end deffn
120 @node Bytevectors as Integers
121 @subsection Interpreting Bytevector Contents as Integers
123 The contents of a bytevector can be interpreted as a sequence of
124 integers of any given size, sign, and endianness.
126 @lisp
127 (let ((bv (make-bytevector 4)))
128   (bytevector-u8-set! bv 0 #x12)
129   (bytevector-u8-set! bv 1 #x34)
130   (bytevector-u8-set! bv 2 #x56)
131   (bytevector-u8-set! bv 3 #x78)
133   (map (lambda (number)
134          (number->string number 16))
135        (list (bytevector-u8-ref bv 0)
136              (bytevector-u16-ref bv 0 (endianness big))
137              (bytevector-u32-ref bv 0 (endianness little)))))
139 @result{} ("12" "1234" "78563412")
140 @end lisp
142 The most generic procedures to interpret bytevector contents as integers
143 are described below.
145 @deffn {Scheme Procedure} bytevector-uint-ref bv index endianness size
146 @deffnx {Scheme Procedure} bytevector-sint-ref bv index endianness size
147 @deffnx {C Function} scm_r6rs_bytevector_uint_ref (bv, index, endianness, size)
148 @deffnx {C Function} scm_r6rs_bytevector_sint_ref (bv, index, endianness, size)
149 Return the @var{size}-byte long unsigned (resp. signed) integer at
150 index @var{index} in @var{bv}, decoded according to @var{endianness}.
151 @end deffn
153 @deffn {Scheme Procedure} bytevector-uint-set! bv index value endianness size
154 @deffnx {Scheme Procedure} bytevector-sint-set! bv index value endianness size
155 @deffnx {C Function} scm_r6rs_bytevector_uint_set_x (bv, index, value, endianness, size)
156 @deffnx {C Function} scm_r6rs_bytevector_sint_set_x (bv, index, value, endianness, size)
157 Set the @var{size}-byte long unsigned (resp. signed) integer at
158 @var{index} to @var{value}, encoded according to @var{endianness}.
159 @end deffn
161 The following procedures are similar to the ones above, but specialized
162 to a given integer size:
164 @deffn {Scheme Procedure} bytevector-u8-ref bv index
165 @deffnx {Scheme Procedure} bytevector-s8-ref bv index
166 @deffnx {Scheme Procedure} bytevector-u16-ref bv index endianness
167 @deffnx {Scheme Procedure} bytevector-s16-ref bv index endianness
168 @deffnx {Scheme Procedure} bytevector-u32-ref bv index endianness
169 @deffnx {Scheme Procedure} bytevector-s32-ref bv index endianness
170 @deffnx {Scheme Procedure} bytevector-u64-ref bv index endianness
171 @deffnx {Scheme Procedure} bytevector-s64-ref bv index endianness
172 @deffnx {C Function} scm_r6rs_bytevector_u8_ref (bv, index)
173 @deffnx {C Function} scm_r6rs_bytevector_s8_ref (bv, index)
174 @deffnx {C Function} scm_r6rs_bytevector_u16_ref (bv, index, endianness)
175 @deffnx {C Function} scm_r6rs_bytevector_s16_ref (bv, index, endianness)
176 @deffnx {C Function} scm_r6rs_bytevector_u32_ref (bv, index, endianness)
177 @deffnx {C Function} scm_r6rs_bytevector_s32_ref (bv, index, endianness)
178 @deffnx {C Function} scm_r6rs_bytevector_u64_ref (bv, index, endianness)
179 @deffnx {C Function} scm_r6rs_bytevector_s64_ref (bv, index, endianness)
180 Return the unsigned @var{n}-bit (signed) integer (where @var{n} is 8,
181 16, 32 or 64) from @var{bv} at @var{index}, decoded according to
182 @var{endianness}.
183 @end deffn
185 @deffn {Scheme Procedure} bytevector-u8-set! bv index value
186 @deffnx {Scheme Procedure} bytevector-s8-set! bv index value
187 @deffnx {Scheme Procedure} bytevector-u16-set! bv index value endianness
188 @deffnx {Scheme Procedure} bytevector-s16-set! bv index value endianness
189 @deffnx {Scheme Procedure} bytevector-u32-set! bv index value endianness
190 @deffnx {Scheme Procedure} bytevector-s32-set! bv index value endianness
191 @deffnx {Scheme Procedure} bytevector-u64-set! bv index value endianness
192 @deffnx {Scheme Procedure} bytevector-s64-set! bv index value endianness
193 @deffnx {C Function} scm_r6rs_bytevector_u8_set_x (bv, index, value)
194 @deffnx {C Function} scm_r6rs_bytevector_s8_set_x (bv, index, value)
195 @deffnx {C Function} scm_r6rs_bytevector_u16_set_x (bv, index, value, endianness)
196 @deffnx {C Function} scm_r6rs_bytevector_s16_set_x (bv, index, value, endianness)
197 @deffnx {C Function} scm_r6rs_bytevector_u32_set_x (bv, index, value, endianness)
198 @deffnx {C Function} scm_r6rs_bytevector_s32_set_x (bv, index, value, endianness)
199 @deffnx {C Function} scm_r6rs_bytevector_u64_set_x (bv, index, value, endianness)
200 @deffnx {C Function} scm_r6rs_bytevector_s64_set_x (bv, index, value, endianness)
201 Store @var{value} as an @var{n}-bit (signed) integer (where @var{n} is
202 8, 16, 32 or 64) in @var{bv} at @var{index}, encoded according to
203 @var{endianness}.
204 @end deffn
206 Finally, a variant specialized for the host's endianness is available
207 for each of these functions (with the exception of the @code{u8}
208 accessors, for obvious reasons):
210 @deffn {Scheme Procedure} bytevector-u16-native-ref bv index
211 @deffnx {Scheme Procedure} bytevector-s16-native-ref bv index
212 @deffnx {Scheme Procedure} bytevector-u32-native-ref bv index
213 @deffnx {Scheme Procedure} bytevector-s32-native-ref bv index
214 @deffnx {Scheme Procedure} bytevector-u64-native-ref bv index
215 @deffnx {Scheme Procedure} bytevector-s64-native-ref bv index
216 @deffnx {C Function} scm_r6rs_bytevector_u16_native_ref (bv, index)
217 @deffnx {C Function} scm_r6rs_bytevector_s16_native_ref (bv, index)
218 @deffnx {C Function} scm_r6rs_bytevector_u32_native_ref (bv, index)
219 @deffnx {C Function} scm_r6rs_bytevector_s32_native_ref (bv, index)
220 @deffnx {C Function} scm_r6rs_bytevector_u64_native_ref (bv, index)
221 @deffnx {C Function} scm_r6rs_bytevector_s64_native_ref (bv, index)
222 Return the unsigned @var{n}-bit (signed) integer (where @var{n} is 8,
223 16, 32 or 64) from @var{bv} at @var{index}, decoded according to the
224 host's native endianness.
225 @end deffn
227 @deffn {Scheme Procedure} bytevector-u16-native-set! bv index value
228 @deffnx {Scheme Procedure} bytevector-s16-native-set! bv index value
229 @deffnx {Scheme Procedure} bytevector-u32-native-set! bv index value
230 @deffnx {Scheme Procedure} bytevector-s32-native-set! bv index value
231 @deffnx {Scheme Procedure} bytevector-u64-native-set! bv index value
232 @deffnx {Scheme Procedure} bytevector-s64-native-set! bv index value
233 @deffnx {C Function} scm_r6rs_bytevector_u16_native_set_x (bv, index, value)
234 @deffnx {C Function} scm_r6rs_bytevector_s16_native_set_x (bv, index, value)
235 @deffnx {C Function} scm_r6rs_bytevector_u32_native_set_x (bv, index, value)
236 @deffnx {C Function} scm_r6rs_bytevector_s32_native_set_x (bv, index, value)
237 @deffnx {C Function} scm_r6rs_bytevector_u64_native_set_x (bv, index, value)
238 @deffnx {C Function} scm_r6rs_bytevector_s64_native_set_x (bv, index, value)
239 Store @var{value} as an @var{n}-bit (signed) integer (where @var{n} is
240 8, 16, 32 or 64) in @var{bv} at @var{index}, encoded according to the
241 host's native endianness.
242 @end deffn
245 @node Bytevectors and Integer Lists
246 @subsection Converting Bytevectors to/from Integer Lists
248 Bytevector contents can readily be converted to/from lists of signed or
249 unsigned integers:
251 @lisp
252 (bytevector->sint-list (u8-list->bytevector (make-list 4 255))
253                        (endianness little) 2)
254 @result{} (-1 -1)
255 @end lisp
257 @deffn {Scheme Procedure} bytevector->u8-list bv
258 @deffnx {C Function} scm_r6rs_bytevector_to_u8_list (bv)
259 Return a newly allocated list of unsigned 8-bit integers from the
260 contents of @var{bv}.
261 @end deffn
263 @deffn {Scheme Procedure} u8-list->bytevector lst
264 @deffnx {C Function} scm_r6rs_u8_list_to_bytevector (lst)
265 Return a newly allocated bytevector consisting of the unsigned 8-bit
266 integers listed in @var{lst}.
267 @end deffn
269 @deffn {Scheme Procedure} bytevector->uint-list bv endianness size
270 @deffnx {Scheme Procedure} bytevector->sint-list bv endianness size
271 @deffnx {C Function} scm_r6rs_bytevector_to_uint_list (bv, endianness, size)
272 @deffnx {C Function} scm_r6rs_bytevector_to_sint_list (bv, endianness, size)
273 Return a list of unsigned (resp. signed) integers of @var{size} bytes
274 representing the contents of @var{bv}, decoded according to
275 @var{endianness}.
276 @end deffn
279 @node Bytevectors as Floats
280 @subsection Interpreting Bytevector Contents as Floating Point Numbers
282 @cindex IEEE-754 floating point numbers
284 Bytevector contents can also be accessed as IEEE-754 single- or
285 double-precision floating point numbers (respectively 32 and 64-bit
286 long) using the procedures described here.
288 @deffn {Scheme Procedure} bytevector-ieee-single-ref bv index endianness
289 @deffnx {Scheme Procedure} bytevector-ieee-double-ref bv index endianness
290 @deffnx {C Function} scm_r6rs_bytevector_ieee_single_ref (bv, index, endianness)
291 @deffnx {C Function} scm_r6rs_bytevector_ieee_double_ref (bv, index, endianness)
292 Return the IEEE-754 single-precision floating point number from @var{bv}
293 at @var{index} according to @var{endianness}.
294 @end deffn
296 @deffn {Scheme Procedure} bytevector-ieee-single-set! bv index value endianness
297 @deffnx {Scheme Procedure} bytevector-ieee-double-set! bv index value endianness
298 @deffnx scm_r6rs_bytevector_ieee_single_set_x (bv, index, value, endianness)
299 @deffnx scm_r6rs_bytevector_ieee_double_set_x (bv, index, value, endianness)
300 Store real number @var{value} in @var{bv} at @var{index} according to
301 @var{endianness}.
302 @end deffn
304 Specialized procedures are also available:
306 @deffn {Scheme Procedure} bytevector-ieee-single-native-ref bv index
307 @deffnx {Scheme Procedure} bytevector-ieee-double-native-ref bv index
308 @deffnx {C Function} scm_r6rs_bytevector_ieee_single_native_ref (bv, index)
309 @deffnx {C Function} scm_r6rs_bytevector_ieee_double_native_ref (bv, index)
310 Return the IEEE-754 single-precision floating point number from @var{bv}
311 at @var{index} according to the host's native endianness.
312 @end deffn
314 @deffn {Scheme Procedure} bytevector-ieee-single-native-set! bv index value
315 @deffnx {Scheme Procedure} bytevector-ieee-double-native-set! bv index value
316 @deffnx scm_r6rs_bytevector_ieee_single_native_set_x (bv, index, value)
317 @deffnx scm_r6rs_bytevector_ieee_double_native_set_x (bv, index, value)
318 Store real number @var{value} in @var{bv} at @var{index} according to
319 the host's native endianness.
320 @end deffn
323 @node Bytevectors as Strings
324 @subsection Interpreting Bytevector Contents as Unicode Strings
326 Bytevector contents can also be interpreted as Unicode strings encoded
327 in one of the most commonly available encoding formats@footnote{Guile
328 1.8 does @emph{not} support Unicode strings.  Therefore, the procedures
329 described here assume that Guile strings are internally encoded
330 according to the current locale.  For instance, if @code{$LC_CTYPE} is
331 @code{fr_FR.ISO-8859-1}, then @code{string->utf-8} @i{et al.} will
332 assume that Guile strings are Latin-1-encoded.}.
334 @lisp
335 (utf8->string (u8-list->bytevector '(99 97 102 101)))
336 @result{} "cafe"
338 (string->utf8 "caf@'e") ;; SMALL LATIN LETTER E WITH ACUTE ACCENT
339 @result{} #vu8(99 97 102 195 169)
340 @end lisp
342 @deffn {Scheme Procedure} string->utf8 str
343 @deffnx {Scheme Procedure} string->utf16 str
344 @deffnx {Scheme Procedure} string->utf32 str
345 @deffnx {C Function} scm_r6rs_string_to_utf8 (str)
346 @deffnx {C Function} scm_r6rs_string_to_utf16 (str)
347 @deffnx {C Function} scm_r6rs_string_to_utf32 (str)
348 Return a newly allocated bytevector that contains the UTF-8, UTF-16, or
349 UTF-32 (aka. UCS-4) encoding of @var{str}.
350 @end deffn
352 @deffn {Scheme Procedure} utf8->string utf
353 @deffnx {Scheme Procedure} utf16->string utf
354 @deffnx {Scheme Procedure} utf32->string utf
355 @deffnx {C Function} scm_r6rs_utf8_to_string (utf)
356 @deffnx {C Function} scm_r6rs_utf16_to_string (utf)
357 @deffnx {C Function} scm_r6rs_utf32_to_string (utf)
358 Return a newly allocated string that contains from the UTF-8-, UTF-16-,
359 or UTF-32-decoded contents of bytevector @var{utf}.
360 @end deffn
363 FIXME: Finish.
365 @c LocalWords:  Bytevectors bytevector bytevectors endianness  endian accessors
366 @c LocalWords:  endiannesses
368 @c Local Variables:
369 @c ispell-local-dictionary: "american"
370 @c End: