2 Copyright (C) 2001-2010, Parrot Foundation.
7 src/io/utf8.c - UTF8 I/O utility functions
11 Convert output to utf8. Convert input to Parrot's internal string
14 =head2 Utility functions
22 #include "parrot/parrot.h"
23 #include "io_private.h"
24 #include "../string/unicode.h"
26 /* HEADERIZER HFILE: include/parrot/io.h */
28 /* HEADERIZER BEGIN: static */
29 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
31 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
32 /* HEADERIZER END: static */
36 =item C<size_t Parrot_io_read_utf8(PARROT_INTERP, PMC *filehandle, STRING
39 Read a string from a filehandle in UTF-8 format and convert it to a Parrot
47 Parrot_io_read_utf8(PARROT_INTERP
, ARGMOD(PMC
*filehandle
),
50 ASSERT_ARGS(Parrot_io_read_utf8
)
54 size_t len
= Parrot_io_read_buffer(interp
, filehandle
, buf
);
56 s
->encoding
= Parrot_utf8_encoding_ptr
;
58 /* count chars, verify utf8 */
59 STRING_ITER_INIT(interp
, &iter
);
61 while (iter
.bytepos
< s
->bufused
) {
62 if (iter
.bytepos
+ 4 > s
->bufused
) {
63 const utf8_t
*u8ptr
= (utf8_t
*)((char *)s
->strstart
+
65 const UINTVAL c
= *u8ptr
;
67 if (UTF8_IS_START(c
)) {
68 UINTVAL len2
= UTF8SKIP(u8ptr
);
71 if (iter
.bytepos
+ len2
<= s
->bufused
)
74 /* need len - 1 more chars */
76 s2
= Parrot_str_new_init(interp
, NULL
, len2
,
77 Parrot_utf8_encoding_ptr
, 0);
80 read
= Parrot_io_read_buffer(interp
, filehandle
, &s2
);
83 s
->strlen
= iter
.charpos
;
84 s
= Parrot_str_concat(interp
, s
, s2
);
92 Parrot_utf8_encoding_ptr
->iter_get_and_advance(interp
, *buf
, &iter
);
94 s
->strlen
= iter
.charpos
;
100 =item C<size_t Parrot_io_write_utf8(PARROT_INTERP, PMC *filehandle, const STRING
103 Write a Parrot string to a filehandle in UTF-8 format.
110 Parrot_io_write_utf8(PARROT_INTERP
, ARGMOD(PMC
*filehandle
), ARGIN(const STRING
*s
))
112 ASSERT_ARGS(Parrot_io_write_utf8
)
115 if (s
->encoding
== Parrot_utf8_encoding_ptr
)
116 return Parrot_io_write_buffer(interp
, filehandle
, s
);
118 dest
= Parrot_utf8_encoding_ptr
->to_encoding(interp
, s
);
119 return Parrot_io_write_buffer(interp
, filehandle
, dest
);
128 F<src/io/io_passdown.c>,
130 F<src/io/io_layers.c>,
131 F<src/io/io_private.h>.
140 * c-file-style: "parrot"
142 * vim: expandtab shiftwidth=4: