Example: Changed glyph-outline.shape for use with FontConfig.
[shapes.git] / source / charconverters.cc
blob23c6e7d6235066d57d933bec8939adf938ecb18a
1 /* This file is part of Shapes.
3 * Shapes is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
6 * any later version.
8 * Shapes is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with Shapes. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright 2008, 2010 Henrik Tidefelt
19 #include "strrefdup.h"
20 #include "charconverters.h"
21 #include "shapesexceptions.h"
22 #include "glyphlist.h"
23 #include "characterencoding.h"
24 #include "texttypes.h"
27 #include <iconv.h>
28 #include <string>
29 #include <fstream>
30 #include <sstream>
34 using namespace Shapes;
36 const char * Helpers::theUCS4EncodingName = "UCS-4-INTERNAL";
38 void
39 iconv_maybe_open( iconv_t * converter, const char * to_encoding, const char * from_encoding )
41 if( *converter == (iconv_t)( -1 ) )
43 *converter = iconv_open( to_encoding, from_encoding );
44 if( *converter == (iconv_t)( -1 ) )
46 std::ostringstream msg;
47 msg << "iconv_open failed to create converter from " << from_encoding << " to " << to_encoding << "." ;
48 throw Exceptions::ExternalError( strrefdup( msg ) );
53 iconv_t
54 Helpers::requireUTF8ToMacRomanConverter( bool cleanup )
56 static iconv_t converter = (iconv_t)( - 1 );
57 if( cleanup )
59 if( converter != (iconv_t)( -1 ) )
61 iconv_close( converter );
62 converter = (iconv_t)( -1 );
65 else
67 iconv_maybe_open( & converter,
68 "Macintosh", // This is meant to be what is called MacRoman in PDF.
69 "UTF-8" );
71 return converter;
74 iconv_t
75 Helpers::requireMacRomanToUTF8Converter( bool cleanup )
77 static iconv_t converter = (iconv_t)( - 1 );
78 if( cleanup )
80 if( converter != (iconv_t)( -1 ) )
82 iconv_close( converter );
83 converter = (iconv_t)( -1 );
86 else
88 iconv_maybe_open( & converter,
89 "UTF-8",
90 "Macintosh" ); // This is meant to be what is called MacRoman in PDF.
92 return converter;
95 iconv_t
96 Helpers::requireUTF8ToASCIIConverter( bool cleanup )
98 static iconv_t converter = (iconv_t)( - 1 );
99 if( cleanup )
101 if( converter != (iconv_t)( -1 ) )
103 iconv_close( converter );
104 converter = (iconv_t)( -1 );
107 else
109 iconv_maybe_open( & converter,
110 "ASCII", // This is used for the names of glyphs in a font
111 "UTF-8" );
113 return converter;
116 iconv_t
117 Helpers::requireUTF8ToUCS4Converter( bool cleanup )
119 static iconv_t converter = (iconv_t)( - 1 );
120 if( cleanup )
122 if( converter != (iconv_t)( -1 ) )
124 iconv_close( converter );
125 converter = (iconv_t)( -1 );
128 else
130 iconv_maybe_open( & converter,
131 Helpers::theUCS4EncodingName,
132 "UTF-8" );
134 return converter;
137 iconv_t
138 Helpers::requireUCS4ToUTF8Converter( bool cleanup )
140 static iconv_t converter = (iconv_t)( - 1 );
141 if( cleanup )
143 if( converter != (iconv_t)( -1 ) )
145 iconv_close( converter );
146 converter = (iconv_t)( -1 );
149 else
151 iconv_maybe_open( & converter,
152 "UTF-8",
153 Helpers::theUCS4EncodingName );
155 return converter;
158 iconv_t
159 Helpers::requireUCS4ToMacRomanConverter( bool cleanup )
161 static iconv_t converter = (iconv_t)( - 1 );
162 if( cleanup )
164 if( converter != (iconv_t)( -1 ) )
166 iconv_close( converter );
167 converter = (iconv_t)( -1 );
170 else
172 iconv_maybe_open( & converter,
173 "MacRoman",
174 Helpers::theUCS4EncodingName );
176 return converter;
179 iconv_t
180 Helpers::requireUTF16BEToUCS4Converter( bool cleanup )
182 static iconv_t converter = (iconv_t)( - 1 );
183 if( cleanup )
185 if( converter != (iconv_t)( -1 ) )
187 iconv_close( converter );
188 converter = (iconv_t)( -1 );
191 else
193 iconv_maybe_open( & converter,
194 Helpers::theUCS4EncodingName,
195 "UTF-16BE" );
197 return converter;
200 iconv_t
201 Helpers::requireUCS4ToUTF16BEConverter( bool cleanup )
203 static iconv_t converter = (iconv_t)( - 1 );
204 if( cleanup )
206 if( converter != (iconv_t)( -1 ) )
208 iconv_close( converter );
209 converter = (iconv_t)( -1 );
212 else
214 iconv_maybe_open( & converter,
215 "UTF-16BE",
216 Helpers::theUCS4EncodingName );
218 return converter;
221 iconv_t
222 Helpers::requireUTF8ToWinANSIConverter( bool cleanup )
224 static iconv_t converter = (iconv_t)( - 1 );
225 if( cleanup )
227 if( converter != (iconv_t)( -1 ) )
229 iconv_close( converter );
230 converter = (iconv_t)( -1 );
233 else
235 iconv_maybe_open( & converter,
236 "LATIN1", // This is meant to be what is called WinANSI in PDF.
237 "UTF-8" );
239 return converter;
242 iconv_t
243 Helpers::requireUTF8ToUTF16BEConverter( bool cleanup )
245 static iconv_t converter = (iconv_t)( - 1 );
246 if( cleanup )
248 if( converter != (iconv_t)( -1 ) )
250 iconv_close( converter );
251 converter = (iconv_t)( -1 );
254 else
256 iconv_maybe_open( & converter,
257 "UTF-16BE",
258 "UTF-8" );
260 return converter;
263 const FontMetrics::GlyphList &
264 Helpers::requireGlyphList( bool cleanup )
266 static const FontMetrics::GlyphList * converter = 0;
267 if( cleanup )
269 if( converter != 0 )
271 delete converter;
272 converter = 0;
275 else
277 if( converter == 0 )
279 std::string filename = Lang::Font::searchGlyphList( );
280 std::ifstream iFile( filename.c_str( ) );
281 if( ! iFile.is_open( ) )
283 std::ostringstream oss;
284 oss << "Could locate, but not open the glyph list " << filename ;
285 throw Exceptions::ExternalError( strrefdup( oss ) );
289 converter = new FontMetrics::GlyphList( iFile );
291 catch( const char * ball )
293 std::ostringstream oss;
294 oss << "Parsing the glyph list " << filename << " resulted in the error: " << ball ;
295 throw Exceptions::ExternalError( strrefdup( oss ) );
297 catch( const std::string ball )
299 std::ostringstream oss;
300 oss << "Parsing the glyph list " << filename << " resulted in the error: " << ball ;
301 throw Exceptions::ExternalError( strrefdup( oss ) );
303 catch( const Shapes::Exceptions::Exception & ball )
305 std::cerr << "Parsing the glyph list " << filename << " resulted an error. Rethrowing." << std::endl ;
306 throw;
308 catch( ... )
310 throw Exceptions::InternalError( "An unrecognized exception was caught from glyph list parsing." );
314 return *converter;
317 const FontMetrics::CharacterEncoding &
318 Helpers::requireMacRomanEncoding( bool cleanup )
320 static const FontMetrics::CharacterEncoding * converter = 0;
321 if( cleanup )
323 if( converter != 0 )
325 delete converter;
326 converter = 0;
329 else
331 if( converter == 0 )
333 std::string filename = Lang::Font::searchCharacterEncoding( "MacRoman" );
334 std::ifstream iFile( filename.c_str( ) );
335 if( ! iFile.is_open( ) )
337 std::ostringstream oss;
338 oss << "Could locate, but not open the character encoding " << filename ;
339 throw Exceptions::ExternalError( strrefdup( oss ) );
343 converter = new FontMetrics::CharacterEncoding( iFile );
345 catch( const char * ball )
347 std::ostringstream oss;
348 oss << "Parsing the character encoding " << filename << " resulted in the error: " << ball ;
349 throw Exceptions::ExternalError( strrefdup( oss ) );
351 catch( const std::string ball )
353 std::ostringstream oss;
354 oss << "Parsing the character encoding " << filename << " resulted in the error: " << ball ;
355 throw Exceptions::ExternalError( strrefdup( oss ) );
357 catch( const Shapes::Exceptions::Exception & ball )
359 std::cerr << "Parsing the character encoding " << filename << " resulted an error. Rethrowing." << std::endl ;
360 throw;
362 catch( ... )
364 throw Exceptions::InternalError( "An unrecognized exception was caught from character encoding parsing." );
368 return *converter;
371 unsigned char
372 Kernel::UnicodeCodePoint::get_MacRoman( ) const
374 static iconv_t converter = Helpers::requireUCS4ToMacRomanConverter( );
376 const size_t BUF_SIZE = 1;
377 char buf[ BUF_SIZE ];
378 char * dst = buf;
379 size_t outbytesleft = BUF_SIZE;
381 const char * src = reinterpret_cast< const char * >( & value_ );
382 size_t inbytesleft = sizeof( value_ );
384 size_t count = iconv( converter,
385 ICONV_CAST( & src ), & inbytesleft,
386 & dst, & outbytesleft );
387 if( count == (size_t)(-1) )
389 if( errno == EILSEQ )
391 std::cerr << "Unicode: " << std::hex << value_ << std::endl ;
392 throw Exceptions::MiscellaneousRequirement( "The UCS-4 code point cannot be represented in MacRoman encodig." );
394 else if( errno == EINVAL )
396 throw Exceptions::InternalError( "Malformed UCS-4 value (in conversion to MacRoman)." );
398 else if( errno == E2BIG )
400 throw Exceptions::InternalError( "The MacRoman destination buffer was too small when encoding a single UCS-4 code point." );
402 else
404 std::ostringstream msg;
405 msg << "iconv failed with an unrecognized error code: " << errno ;
406 throw Exceptions::InternalError( strrefdup( msg ) );
409 else if( inbytesleft != 0 )
411 throw Exceptions::InternalError( "Failed to use the entire UCS-4 code point when converting to MacRoman." );
413 return *reinterpret_cast< unsigned char * >( buf );
416 void
417 Kernel::UnicodeCodePoint::decode_UTF8( const char ** src, size_t * src_avail )
419 static iconv_t converter = Helpers::requireUTF8ToUCS4Converter( );
421 char * dst = reinterpret_cast< char * >( & value_ );
422 size_t outbytesleft = sizeof( value_ );
424 size_t tmp_src_avail;
425 if( **src > 0 )
427 tmp_src_avail = 1;
429 else
431 switch( 0xF0 & **src )
433 case 0xE0: tmp_src_avail = 3; break;
434 case 0xF0: tmp_src_avail = 4; break;
435 default: tmp_src_avail = 2; break;
438 if( tmp_src_avail > *src_avail )
440 throw Exceptions::InternalError( "The UTF-8 source did not contain a complete character when initializing a single UCS-4 code point." );
442 *src_avail -= tmp_src_avail;
444 size_t count = iconv( converter,
445 ICONV_CAST( src ), & tmp_src_avail,
446 & dst, & outbytesleft );
447 if( count == (size_t)(-1) )
449 if( errno == EILSEQ )
451 throw Exceptions::InternalError( "Failed to initialize UCS-4 code point from UTF-8 data." );
453 else if( errno == EINVAL )
455 throw Exceptions::MiscellaneousRequirement( "Malformed UTF-8 value in initialization of UCS-4 code point." );
457 else if( errno == E2BIG )
459 throw Exceptions::InternalError( "The UTF-8 source buffer contained more than one character when initializing a single UCS-4 code point." );
461 else
463 std::ostringstream msg;
464 msg << "iconv failed with an unrecognized error code: " << errno ;
465 throw Exceptions::InternalError( strrefdup( msg ) );
468 else if( outbytesleft != 0 )
470 throw Exceptions::InternalError( "Failed to initialize the entire UCS-4 code point when converting from UTF-8." );
474 void
475 Kernel::UnicodeCodePoint::decode_UTF8( const char * src )
477 static iconv_t converter = Helpers::requireUTF8ToUCS4Converter( );
479 char * dst = reinterpret_cast< char * >( & value_ );
480 size_t outbytesleft = sizeof( value_ );
482 size_t tmp_src_avail;
483 if( *src > 0 )
485 tmp_src_avail = 1;
487 else
489 switch( 0xF0 & *src )
491 case 0xE0: tmp_src_avail = 3; break;
492 case 0xF0: tmp_src_avail = 4; break;
493 default: tmp_src_avail = 2; break;
497 size_t count = iconv( converter,
498 ICONV_CAST( & src ), & tmp_src_avail,
499 & dst, & outbytesleft );
500 if( count == (size_t)(-1) )
502 if( errno == EILSEQ )
504 throw Exceptions::InternalError( "Failed to initialize UCS-4 code point from UTF-8 data." );
506 else if( errno == EINVAL )
508 throw Exceptions::MiscellaneousRequirement( "Malformed UTF-8 value in initialization of UCS-4 code point." );
510 else if( errno == E2BIG )
512 throw Exceptions::InternalError( "The UTF-8 source buffer contained more than one character when initializing a single UCS-4 code point." );
514 else
516 std::ostringstream msg;
517 msg << "iconv failed with an unrecognized error code: " << errno ;
518 throw Exceptions::InternalError( strrefdup( msg ) );
521 else if( outbytesleft != 0 )
523 throw Exceptions::InternalError( "Failed to initialize the entire UCS-4 code point when converting from UTF-8." );
527 void
528 Kernel::UnicodeCodePoint::decode_UCS4( const char ** src, size_t * src_avail )
530 if( *src_avail < 4 )
532 throw Exceptions::InternalError( "Not enough data available when initializing UCS-4 code point (needs four bytes)." );
534 memcpy( reinterpret_cast< char * >( & value_ ), *src, 4 );
535 *src += 4;
536 *src_avail -= 4;
539 void
540 Kernel::UnicodeCodePoint::decode_UCS4( const char * src )
542 memcpy( reinterpret_cast< char * >( & value_ ), src, 4 );
545 void
546 Kernel::UnicodeCodePoint::encode_UTF8( char ** dst, size_t * dst_avail ) const
548 static iconv_t converter = Helpers::requireUCS4ToUTF8Converter( );
550 const char * src = reinterpret_cast< const char * >( & value_ );
551 size_t inbytesleft = sizeof( value_ );
553 size_t count = iconv( converter,
554 ICONV_CAST( & src ), & inbytesleft,
555 dst, dst_avail );
556 if( count == (size_t)(-1) )
558 if( errno == EILSEQ )
560 throw Exceptions::InternalError( "Failed to convert UCS-4 code point to UTF-8." );
562 else if( errno == EINVAL )
564 throw Exceptions::InternalError( "Malformed UCS-4 value (in conversion to UTF-8)." );
566 else if( errno == E2BIG )
568 throw Exceptions::InternalError( "The UTF-8 destination buffer was too small when encoding a single UCS-4 code point." );
570 else
572 std::ostringstream msg;
573 msg << "iconv failed with an unrecognized error code: " << errno ;
574 throw Exceptions::InternalError( strrefdup( msg ) );
577 else if( inbytesleft != 0 )
579 throw Exceptions::InternalError( "Failed to use the entire UCS-4 code point when converting to UTF-8." );
583 void
584 Kernel::UnicodeCodePoint::encode_UTF16BE( char ** dst, size_t * dst_avail ) const
586 static iconv_t converter = Helpers::requireUCS4ToUTF16BEConverter( );
588 const char * src = reinterpret_cast< const char * >( & value_ );
589 size_t inbytesleft = sizeof( value_ );
591 size_t count = iconv( converter,
592 ICONV_CAST( & src ), & inbytesleft,
593 dst, dst_avail );
594 if( count == (size_t)(-1) )
596 if( errno == EILSEQ )
598 throw Exceptions::InternalError( "Failed to convert UCS-4 code point to UTF-16-BE." );
600 else if( errno == EINVAL )
602 throw Exceptions::InternalError( "Malformed UCS-4 value (in conversion to UTF-16-BE)." );
604 else if( errno == E2BIG )
606 throw Exceptions::InternalError( "The UTF-16-BE destination buffer was too small when encoding a single UCS-4 code point." );
608 else
610 std::ostringstream msg;
611 msg << "iconv failed with an unrecognized error code: " << errno ;
612 throw Exceptions::InternalError( strrefdup( msg ) );
615 else if( inbytesleft != 0 )
617 throw Exceptions::InternalError( "Failed to use the entire UCS-4 code point when converting to UTF-16-BE." );
621 void
622 Kernel::UnicodeCodePoint::decode_glyph_name( const char * name )
624 static const FontMetrics::GlyphList & glyphList = Helpers::requireGlyphList( );
625 if( ! glyphList.name_to_UCS4( name, & value_ ) )
627 std::ostringstream msg;
628 msg << "The glyph name \"" << name << "\" is not in the glyph list, and cannot be converted to a UCS-4 code point." ;
629 throw Exceptions::InternalError( strrefdup( msg ) );
633 Kernel::UnicodeCodePoint Kernel::UnicodeCodePoint::SPACE( 32 );
634 Kernel::UnicodeCodePoint Kernel::UnicodeCodePoint::NEWLINE( 10 );