fix codetest failure - ASSERT_ARGS does not have a ; after and
[parrot.git] / src / io / utf8.c
blob8000962f34b5ecefea3c03795b3ca27082054bcf
1 /*
2 Copyright (C) 2001-2010, Parrot Foundation.
3 $Id$
5 =head1 NAME
7 src/io/utf8.c - UTF8 I/O utility functions
9 =head1 DESCRIPTION
11 Convert output to utf8. Convert input to Parrot's internal string
12 representation.
14 =head2 Utility functions
16 =over 4
18 =cut
22 #include "parrot/parrot.h"
23 #include "io_private.h"
24 #include "../string/unicode.h"
26 /* HEADERIZER HFILE: include/parrot/io.h */
28 /* HEADERIZER BEGIN: static */
29 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
31 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
32 /* HEADERIZER END: static */
36 =item C<size_t Parrot_io_read_utf8(PARROT_INTERP, PMC *filehandle, STRING
37 **buf)>
39 Read a string from a filehandle in UTF-8 format and convert it to a Parrot
40 string.
42 =cut
46 size_t
47 Parrot_io_read_utf8(PARROT_INTERP, ARGMOD(PMC *filehandle),
48 ARGMOD(STRING **buf))
50 ASSERT_ARGS(Parrot_io_read_utf8)
51 STRING *s, *s2;
52 String_iter iter;
54 size_t len = Parrot_io_read_buffer(interp, filehandle, buf);
55 s = *buf;
56 s->encoding = Parrot_utf8_encoding_ptr;
58 /* count chars, verify utf8 */
59 STRING_ITER_INIT(interp, &iter);
61 while (iter.bytepos < s->bufused) {
62 if (iter.bytepos + 4 > s->bufused) {
63 const utf8_t *u8ptr = (utf8_t *)((char *)s->strstart +
64 iter.bytepos);
65 const UINTVAL c = *u8ptr;
67 if (UTF8_IS_START(c)) {
68 UINTVAL len2 = UTF8SKIP(u8ptr);
69 INTVAL read;
71 if (iter.bytepos + len2 <= s->bufused)
72 goto ok;
74 /* need len - 1 more chars */
75 --len2;
76 s2 = Parrot_str_new_init(interp, NULL, len2,
77 Parrot_utf8_encoding_ptr, 0);
78 s2->bufused = len2;
80 read = Parrot_io_read_buffer(interp, filehandle, &s2);
81 UNUSED(read);
83 s->strlen = iter.charpos;
84 s = Parrot_str_concat(interp, s, s2);
85 *buf = s;
86 len += len2 + 1;
88 /* check last char */
91 ok:
92 Parrot_utf8_encoding_ptr->iter_get_and_advance(interp, *buf, &iter);
94 s->strlen = iter.charpos;
95 return len;
100 =item C<size_t Parrot_io_write_utf8(PARROT_INTERP, PMC *filehandle, const STRING
101 *s)>
103 Write a Parrot string to a filehandle in UTF-8 format.
105 =cut
109 size_t
110 Parrot_io_write_utf8(PARROT_INTERP, ARGMOD(PMC *filehandle), ARGIN(const STRING *s))
112 ASSERT_ARGS(Parrot_io_write_utf8)
113 STRING *dest;
115 if (s->encoding == Parrot_utf8_encoding_ptr)
116 return Parrot_io_write_buffer(interp, filehandle, s);
118 dest = Parrot_utf8_encoding_ptr->to_encoding(interp, s);
119 return Parrot_io_write_buffer(interp, filehandle, dest);
124 =back
126 =head1 SEE ALSO
128 F<src/io/io_passdown.c>,
129 F<src/io/io.c>,
130 F<src/io/io_layers.c>,
131 F<src/io/io_private.h>.
133 =cut
139 * Local variables:
140 * c-file-style: "parrot"
141 * End:
142 * vim: expandtab shiftwidth=4: