2 Copyright (C) 2004-2008, The Perl Foundation.
7 src/encodings/fixed_8.c
11 This file implements the encoding functions for fixed-width 8-bit codepoints
19 #include "parrot/parrot.h"
22 /* HEADERIZER HFILE: src/encodings/fixed_8.h */
24 /* HEADERIZER BEGIN: static */
25 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
27 static void become_encoding(PARROT_INTERP
, SHIM(STRING
*source_string
))
28 __attribute__nonnull__(1);
30 static UINTVAL
bytes(SHIM_INTERP
, ARGIN(STRING
*source_string
))
31 __attribute__nonnull__(2);
33 static UINTVAL
codepoints(PARROT_INTERP
, ARGIN(STRING
*source_string
))
34 __attribute__nonnull__(1)
35 __attribute__nonnull__(2);
37 static UINTVAL
fixed8_get_next(PARROT_INTERP
, ARGMOD(String_iter
*iter
))
38 __attribute__nonnull__(1)
39 __attribute__nonnull__(2)
42 static void fixed8_set_next(PARROT_INTERP
,
43 ARGMOD(String_iter
*iter
),
45 __attribute__nonnull__(1)
46 __attribute__nonnull__(2)
49 static void fixed8_set_position(SHIM_INTERP
,
50 ARGMOD(String_iter
*iter
),
52 __attribute__nonnull__(2)
55 PARROT_WARN_UNUSED_RESULT
56 static UINTVAL
get_byte(PARROT_INTERP
,
57 ARGIN(const STRING
*source_string
),
59 __attribute__nonnull__(1)
60 __attribute__nonnull__(2);
62 PARROT_WARN_UNUSED_RESULT
63 PARROT_CANNOT_RETURN_NULL
64 static STRING
* get_bytes(PARROT_INTERP
,
65 ARGIN(STRING
*source_string
),
68 __attribute__nonnull__(1)
69 __attribute__nonnull__(2);
71 PARROT_WARN_UNUSED_RESULT
72 PARROT_CANNOT_RETURN_NULL
73 static STRING
* get_bytes_inplace(PARROT_INTERP
,
74 ARGIN(STRING
*source_string
),
77 ARGMOD(STRING
*return_string
))
78 __attribute__nonnull__(1)
79 __attribute__nonnull__(2)
80 __attribute__nonnull__(5)
81 FUNC_MODIFIES(*return_string
);
83 PARROT_WARN_UNUSED_RESULT
84 static UINTVAL
get_codepoint(PARROT_INTERP
,
85 ARGIN(const STRING
*source_string
),
87 __attribute__nonnull__(1)
88 __attribute__nonnull__(2);
90 PARROT_WARN_UNUSED_RESULT
91 PARROT_CANNOT_RETURN_NULL
92 static STRING
* get_codepoints(PARROT_INTERP
,
93 ARGIN(STRING
*source_string
),
96 __attribute__nonnull__(1)
97 __attribute__nonnull__(2);
99 PARROT_WARN_UNUSED_RESULT
100 PARROT_CANNOT_RETURN_NULL
101 static STRING
* get_codepoints_inplace(PARROT_INTERP
,
102 ARGIN(STRING
*source_string
),
105 ARGMOD(STRING
*dest_string
))
106 __attribute__nonnull__(1)
107 __attribute__nonnull__(2)
108 __attribute__nonnull__(5)
109 FUNC_MODIFIES(*dest_string
);
111 static void iter_init(SHIM_INTERP
,
112 ARGIN(const STRING
*src
),
113 ARGOUT(String_iter
*iter
))
114 __attribute__nonnull__(2)
115 __attribute__nonnull__(3)
116 FUNC_MODIFIES(*iter
);
118 static void set_byte(PARROT_INTERP
,
119 ARGIN(const STRING
*source_string
),
122 __attribute__nonnull__(1)
123 __attribute__nonnull__(2);
125 static void set_bytes(PARROT_INTERP
,
126 ARGIN(STRING
*source_string
),
129 ARGMOD(STRING
*new_bytes
))
130 __attribute__nonnull__(1)
131 __attribute__nonnull__(2)
132 __attribute__nonnull__(5)
133 FUNC_MODIFIES(*new_bytes
);
135 static void set_codepoint(PARROT_INTERP
,
136 ARGIN(STRING
*source_string
),
139 __attribute__nonnull__(1)
140 __attribute__nonnull__(2);
142 static void set_codepoints(PARROT_INTERP
,
143 ARGIN(STRING
*source_string
),
146 ARGMOD(STRING
*new_codepoints
))
147 __attribute__nonnull__(1)
148 __attribute__nonnull__(2)
149 __attribute__nonnull__(5)
150 FUNC_MODIFIES(*new_codepoints
);
152 PARROT_DOES_NOT_RETURN
153 PARROT_CANNOT_RETURN_NULL
154 static STRING
* to_encoding(PARROT_INTERP
,
157 __attribute__nonnull__(1);
159 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
160 /* HEADERIZER END: static */
162 #define UNIMPL real_exception(interp, NULL, UNIMPLEMENTED, "unimpl fixed_8")
166 =item C<static STRING * to_encoding>
168 Converts the string C<src> to this particular encoding. If C<dest> is
169 provided, it will contain the result. Otherwise this function operates in
177 PARROT_DOES_NOT_RETURN
178 PARROT_CANNOT_RETURN_NULL
180 to_encoding(PARROT_INTERP
, SHIM(STRING
*src
), SHIM(STRING
*dest
))
188 =item C<static UINTVAL get_codepoint>
190 codepoints are bytes, so delegate
196 PARROT_WARN_UNUSED_RESULT
198 get_codepoint(PARROT_INTERP
, ARGIN(const STRING
*source_string
),
201 return get_byte(interp
, source_string
, offset
);
206 =item C<static void set_codepoint>
208 This is the same as set byte
215 set_codepoint(PARROT_INTERP
, ARGIN(STRING
*source_string
),
216 UINTVAL offset
, UINTVAL codepoint
)
218 set_byte(interp
, source_string
, offset
, codepoint
);
223 =item C<static UINTVAL get_byte>
225 Returns the byte in string C<src> at position C<offset>.
231 PARROT_WARN_UNUSED_RESULT
233 get_byte(PARROT_INTERP
, ARGIN(const STRING
*source_string
), UINTVAL offset
)
235 unsigned char *contents
= (unsigned char *)source_string
->strstart
;
236 if (offset
>= source_string
->bufused
) {
237 /* real_exception(interp, NULL, 0,
238 "get_byte past the end of the buffer (%i of %i)",
239 offset, source_string->bufused);*/
242 return contents
[offset
];
247 =item C<static void set_byte>
249 Sets, in string C<src> at position C<offset>, the byte C<byte>.
256 set_byte(PARROT_INTERP
, ARGIN(const STRING
*source_string
),
257 UINTVAL offset
, UINTVAL byte
)
259 unsigned char *contents
;
260 if (offset
>= source_string
->bufused
) {
261 real_exception(interp
, NULL
, 0, "set_byte past the end of the buffer");
263 contents
= (unsigned char *)source_string
->strstart
;
264 contents
[offset
] = (unsigned char)byte
;
269 =item C<static STRING * get_codepoints>
271 Returns the codepoints in string C<src> at position C<offset> and length
272 C<count>. (Delegates to C<get_bytes>.)
278 PARROT_WARN_UNUSED_RESULT
279 PARROT_CANNOT_RETURN_NULL
281 get_codepoints(PARROT_INTERP
, ARGIN(STRING
*source_string
),
282 UINTVAL offset
, UINTVAL count
)
284 STRING
* const return_string
= get_bytes(interp
, source_string
,
286 return_string
->charset
= source_string
->charset
;
287 return return_string
;
292 =item C<static STRING * get_bytes>
294 Returns the bytes in string C<src> at position C<offset> and length C<count>.
301 PARROT_WARN_UNUSED_RESULT
302 PARROT_CANNOT_RETURN_NULL
304 get_bytes(PARROT_INTERP
, ARGIN(STRING
*source_string
), UINTVAL offset
, UINTVAL count
)
306 STRING
* const return_string
= Parrot_make_COW_reference(interp
,
308 return_string
->encoding
= source_string
->encoding
;
309 return_string
->charset
= source_string
->charset
;
311 return_string
->strstart
= (char *)return_string
->strstart
+ offset
;
312 return_string
->bufused
= count
;
314 return_string
->strlen
= count
;
315 return_string
->hashval
= 0;
317 return return_string
;
323 =item C<static STRING * get_codepoints_inplace>
325 Gets from string C<src> at position C<offset> C<count> codepoints and returns
326 them in C<return_string>. (Delegates to C<get_bytes>.)
332 PARROT_WARN_UNUSED_RESULT
333 PARROT_CANNOT_RETURN_NULL
335 get_codepoints_inplace(PARROT_INTERP
, ARGIN(STRING
*source_string
),
336 UINTVAL offset
, UINTVAL count
, ARGMOD(STRING
*dest_string
))
339 return get_bytes_inplace(interp
, source_string
, offset
,
345 =item C<static STRING * get_bytes_inplace>
347 Gets from string C<src> at position C<offset> C<count> bytes and returns them
354 PARROT_WARN_UNUSED_RESULT
355 PARROT_CANNOT_RETURN_NULL
357 get_bytes_inplace(PARROT_INTERP
, ARGIN(STRING
*source_string
),
358 UINTVAL offset
, UINTVAL count
, ARGMOD(STRING
*return_string
))
360 Parrot_reuse_COW_reference(interp
, source_string
, return_string
);
362 return_string
->strstart
= (char *)return_string
->strstart
+ offset
;
363 return_string
->bufused
= count
;
365 return_string
->strlen
= count
;
366 return_string
->hashval
= 0;
368 return return_string
;
373 =item C<static void set_codepoints>
375 Delegate to set_bytes
382 set_codepoints(PARROT_INTERP
, ARGIN(STRING
*source_string
),
383 UINTVAL offset
, UINTVAL count
, ARGMOD(STRING
*new_codepoints
))
385 set_bytes(interp
, source_string
, offset
, count
, new_codepoints
);
390 =item C<static void set_bytes>
392 Replaces in string C<src> at position C<offset> for C<count> bytes with the
393 contents of string C<new_bytes>.
400 set_bytes(PARROT_INTERP
, ARGIN(STRING
*source_string
),
401 UINTVAL offset
, UINTVAL count
, ARGMOD(STRING
*new_bytes
))
403 string_replace(interp
, source_string
, offset
, count
, new_bytes
, NULL
);
408 =item C<static void become_encoding>
410 Unconditionally makes the string be in this encoding, if that's valid
417 become_encoding(PARROT_INTERP
, SHIM(STRING
*source_string
))
425 =item C<static UINTVAL codepoints>
427 Returns the number of codepoints in string C<src>.
434 codepoints(PARROT_INTERP
, ARGIN(STRING
*source_string
))
436 return bytes(interp
, source_string
);
441 =item C<static UINTVAL bytes>
443 Returns the number of bytes in string C<src>.
450 bytes(SHIM_INTERP
, ARGIN(STRING
*source_string
))
452 return source_string
->bufused
;
461 =item C<static UINTVAL fixed8_get_next>
463 Moves the string iterator C<i> to the next codepoint.
470 fixed8_get_next(PARROT_INTERP
, ARGMOD(String_iter
*iter
))
472 const UINTVAL c
= get_byte(interp
, iter
->str
, iter
->charpos
++);
479 =item C<static void fixed8_set_next>
481 With the string iterator C<i>, appends the codepoint C<c> and advances to the
482 next position in the string.
489 fixed8_set_next(PARROT_INTERP
, ARGMOD(String_iter
*iter
), UINTVAL c
)
491 set_byte(interp
, iter
->str
, iter
->charpos
++, c
);
497 =item C<static void fixed8_set_position>
499 Moves the string iterator C<i> to the position C<n> in the string.
506 fixed8_set_position(SHIM_INTERP
, ARGMOD(String_iter
*iter
), UINTVAL pos
)
508 iter
->bytepos
= iter
->charpos
= pos
;
509 PARROT_ASSERT(pos
<= PObj_buflen(iter
->str
));
515 =item C<static void iter_init>
517 Initializes for string C<src> the string iterator C<iter>.
524 iter_init(SHIM_INTERP
, ARGIN(const STRING
*src
), ARGOUT(String_iter
*iter
))
527 iter
->bytepos
= iter
->charpos
= 0;
528 iter
->get_and_advance
= fixed8_get_next
;
529 iter
->set_and_advance
= fixed8_set_next
;
530 iter
->set_position
= fixed8_set_position
;
535 =item C<ENCODING * Parrot_encoding_fixed_8_init>
537 Initializes the fixed-8 encoding.
543 PARROT_CANNOT_RETURN_NULL
545 Parrot_encoding_fixed_8_init(PARROT_INTERP
)
547 ENCODING
* const return_encoding
= Parrot_new_encoding(interp
);
549 ENCODING base_encoding
= {
551 1, /* Max bytes per codepoint */
558 get_codepoints_inplace
,
569 STRUCT_COPY_FROM_STRUCT(return_encoding
, base_encoding
);
570 Parrot_register_encoding(interp
, "fixed_8", return_encoding
);
571 return return_encoding
;
577 * c-file-style: "parrot"
579 * vim: expandtab shiftwidth=4: