1 /* This file is part of Shapes.
3 * Shapes is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
8 * Shapes is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with Shapes. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright 2008, 2010 Henrik Tidefelt
19 #include "strrefdup.h"
20 #include "charconverters.h"
21 #include "shapesexceptions.h"
22 #include "glyphlist.h"
23 #include "characterencoding.h"
24 #include "texttypes.h"
34 using namespace Shapes
;
36 const char * Helpers::theUCS4EncodingName
= "UCS-4-INTERNAL";
39 iconv_maybe_open( iconv_t
* converter
, const char * to_encoding
, const char * from_encoding
)
41 if( *converter
== (iconv_t
)( -1 ) )
43 *converter
= iconv_open( to_encoding
, from_encoding
);
44 if( *converter
== (iconv_t
)( -1 ) )
46 std::ostringstream msg
;
47 msg
<< "iconv_open failed to create converter from " << from_encoding
<< " to " << to_encoding
<< "." ;
48 throw Exceptions::ExternalError( strrefdup( msg
) );
54 Helpers::requireUTF8ToMacRomanConverter( bool cleanup
)
56 static iconv_t converter
= (iconv_t
)( - 1 );
59 if( converter
!= (iconv_t
)( -1 ) )
61 iconv_close( converter
);
62 converter
= (iconv_t
)( -1 );
67 iconv_maybe_open( & converter
,
68 "Macintosh", // This is meant to be what is called MacRoman in PDF.
75 Helpers::requireMacRomanToUTF8Converter( bool cleanup
)
77 static iconv_t converter
= (iconv_t
)( - 1 );
80 if( converter
!= (iconv_t
)( -1 ) )
82 iconv_close( converter
);
83 converter
= (iconv_t
)( -1 );
88 iconv_maybe_open( & converter
,
90 "Macintosh" ); // This is meant to be what is called MacRoman in PDF.
96 Helpers::requireUTF8ToASCIIConverter( bool cleanup
)
98 static iconv_t converter
= (iconv_t
)( - 1 );
101 if( converter
!= (iconv_t
)( -1 ) )
103 iconv_close( converter
);
104 converter
= (iconv_t
)( -1 );
109 iconv_maybe_open( & converter
,
110 "ASCII", // This is used for the names of glyphs in a font
117 Helpers::requireUTF8ToUCS4Converter( bool cleanup
)
119 static iconv_t converter
= (iconv_t
)( - 1 );
122 if( converter
!= (iconv_t
)( -1 ) )
124 iconv_close( converter
);
125 converter
= (iconv_t
)( -1 );
130 iconv_maybe_open( & converter
,
131 Helpers::theUCS4EncodingName
,
138 Helpers::requireUCS4ToUTF8Converter( bool cleanup
)
140 static iconv_t converter
= (iconv_t
)( - 1 );
143 if( converter
!= (iconv_t
)( -1 ) )
145 iconv_close( converter
);
146 converter
= (iconv_t
)( -1 );
151 iconv_maybe_open( & converter
,
153 Helpers::theUCS4EncodingName
);
159 Helpers::requireUCS4ToMacRomanConverter( bool cleanup
)
161 static iconv_t converter
= (iconv_t
)( - 1 );
164 if( converter
!= (iconv_t
)( -1 ) )
166 iconv_close( converter
);
167 converter
= (iconv_t
)( -1 );
172 iconv_maybe_open( & converter
,
174 Helpers::theUCS4EncodingName
);
180 Helpers::requireUTF16BEToUCS4Converter( bool cleanup
)
182 static iconv_t converter
= (iconv_t
)( - 1 );
185 if( converter
!= (iconv_t
)( -1 ) )
187 iconv_close( converter
);
188 converter
= (iconv_t
)( -1 );
193 iconv_maybe_open( & converter
,
194 Helpers::theUCS4EncodingName
,
201 Helpers::requireUCS4ToUTF16BEConverter( bool cleanup
)
203 static iconv_t converter
= (iconv_t
)( - 1 );
206 if( converter
!= (iconv_t
)( -1 ) )
208 iconv_close( converter
);
209 converter
= (iconv_t
)( -1 );
214 iconv_maybe_open( & converter
,
216 Helpers::theUCS4EncodingName
);
222 Helpers::requireUTF8ToWinANSIConverter( bool cleanup
)
224 static iconv_t converter
= (iconv_t
)( - 1 );
227 if( converter
!= (iconv_t
)( -1 ) )
229 iconv_close( converter
);
230 converter
= (iconv_t
)( -1 );
235 iconv_maybe_open( & converter
,
236 "LATIN1", // This is meant to be what is called WinANSI in PDF.
243 Helpers::requireUTF8ToUTF16BEConverter( bool cleanup
)
245 static iconv_t converter
= (iconv_t
)( - 1 );
248 if( converter
!= (iconv_t
)( -1 ) )
250 iconv_close( converter
);
251 converter
= (iconv_t
)( -1 );
256 iconv_maybe_open( & converter
,
263 const FontMetrics::GlyphList
&
264 Helpers::requireGlyphList( bool cleanup
)
266 static const FontMetrics::GlyphList
* converter
= 0;
279 std::string filename
= Lang::Font::searchGlyphList( );
280 std::ifstream
iFile( filename
.c_str( ) );
281 if( ! iFile
.is_open( ) )
283 std::ostringstream oss
;
284 oss
<< "Could locate, but not open the glyph list " << filename
;
285 throw Exceptions::ExternalError( strrefdup( oss
) );
289 converter
= new FontMetrics::GlyphList( iFile
);
291 catch( const char * ball
)
293 std::ostringstream oss
;
294 oss
<< "Parsing the glyph list " << filename
<< " resulted in the error: " << ball
;
295 throw Exceptions::ExternalError( strrefdup( oss
) );
297 catch( const std::string ball
)
299 std::ostringstream oss
;
300 oss
<< "Parsing the glyph list " << filename
<< " resulted in the error: " << ball
;
301 throw Exceptions::ExternalError( strrefdup( oss
) );
303 catch( const Shapes::Exceptions::Exception
& ball
)
305 std::cerr
<< "Parsing the glyph list " << filename
<< " resulted an error. Rethrowing." << std::endl
;
310 throw Exceptions::InternalError( "An unrecognized exception was caught from glyph list parsing." );
317 const FontMetrics::CharacterEncoding
&
318 Helpers::requireMacRomanEncoding( bool cleanup
)
320 static const FontMetrics::CharacterEncoding
* converter
= 0;
333 std::string filename
= Lang::Font::searchCharacterEncoding( "MacRoman" );
334 std::ifstream
iFile( filename
.c_str( ) );
335 if( ! iFile
.is_open( ) )
337 std::ostringstream oss
;
338 oss
<< "Could locate, but not open the character encoding " << filename
;
339 throw Exceptions::ExternalError( strrefdup( oss
) );
343 converter
= new FontMetrics::CharacterEncoding( iFile
);
345 catch( const char * ball
)
347 std::ostringstream oss
;
348 oss
<< "Parsing the character encoding " << filename
<< " resulted in the error: " << ball
;
349 throw Exceptions::ExternalError( strrefdup( oss
) );
351 catch( const std::string ball
)
353 std::ostringstream oss
;
354 oss
<< "Parsing the character encoding " << filename
<< " resulted in the error: " << ball
;
355 throw Exceptions::ExternalError( strrefdup( oss
) );
357 catch( const Shapes::Exceptions::Exception
& ball
)
359 std::cerr
<< "Parsing the character encoding " << filename
<< " resulted an error. Rethrowing." << std::endl
;
364 throw Exceptions::InternalError( "An unrecognized exception was caught from character encoding parsing." );
372 Kernel::UnicodeCodePoint::get_MacRoman( ) const
374 static iconv_t converter
= Helpers::requireUCS4ToMacRomanConverter( );
376 const size_t BUF_SIZE
= 1;
377 char buf
[ BUF_SIZE
];
379 size_t outbytesleft
= BUF_SIZE
;
381 const char * src
= reinterpret_cast< const char * >( & value_
);
382 size_t inbytesleft
= sizeof( value_
);
384 size_t count
= iconv( converter
,
385 ICONV_CAST( & src
), & inbytesleft
,
386 & dst
, & outbytesleft
);
387 if( count
== (size_t)(-1) )
389 if( errno
== EILSEQ
)
391 std::cerr
<< "Unicode: " << std::hex
<< value_
<< std::endl
;
392 throw Exceptions::MiscellaneousRequirement( "The UCS-4 code point cannot be represented in MacRoman encodig." );
394 else if( errno
== EINVAL
)
396 throw Exceptions::InternalError( "Malformed UCS-4 value (in conversion to MacRoman)." );
398 else if( errno
== E2BIG
)
400 throw Exceptions::InternalError( "The MacRoman destination buffer was too small when encoding a single UCS-4 code point." );
404 std::ostringstream msg
;
405 msg
<< "iconv failed with an unrecognized error code: " << errno
;
406 throw Exceptions::InternalError( strrefdup( msg
) );
409 else if( inbytesleft
!= 0 )
411 throw Exceptions::InternalError( "Failed to use the entire UCS-4 code point when converting to MacRoman." );
413 return *reinterpret_cast< unsigned char * >( buf
);
417 Kernel::UnicodeCodePoint::decode_UTF8( const char ** src
, size_t * src_avail
)
419 static iconv_t converter
= Helpers::requireUTF8ToUCS4Converter( );
421 char * dst
= reinterpret_cast< char * >( & value_
);
422 size_t outbytesleft
= sizeof( value_
);
424 size_t tmp_src_avail
;
431 switch( 0xF0 & **src
)
433 case 0xE0: tmp_src_avail
= 3; break;
434 case 0xF0: tmp_src_avail
= 4; break;
435 default: tmp_src_avail
= 2; break;
438 if( tmp_src_avail
> *src_avail
)
440 throw Exceptions::InternalError( "The UTF-8 source did not contain a complete character when initializing a single UCS-4 code point." );
442 *src_avail
-= tmp_src_avail
;
444 size_t count
= iconv( converter
,
445 ICONV_CAST( src
), & tmp_src_avail
,
446 & dst
, & outbytesleft
);
447 if( count
== (size_t)(-1) )
449 if( errno
== EILSEQ
)
451 throw Exceptions::InternalError( "Failed to initialize UCS-4 code point from UTF-8 data." );
453 else if( errno
== EINVAL
)
455 throw Exceptions::MiscellaneousRequirement( "Malformed UTF-8 value in initialization of UCS-4 code point." );
457 else if( errno
== E2BIG
)
459 throw Exceptions::InternalError( "The UTF-8 source buffer contained more than one character when initializing a single UCS-4 code point." );
463 std::ostringstream msg
;
464 msg
<< "iconv failed with an unrecognized error code: " << errno
;
465 throw Exceptions::InternalError( strrefdup( msg
) );
468 else if( outbytesleft
!= 0 )
470 throw Exceptions::InternalError( "Failed to initialize the entire UCS-4 code point when converting from UTF-8." );
475 Kernel::UnicodeCodePoint::decode_UTF8( const char * src
)
477 static iconv_t converter
= Helpers::requireUTF8ToUCS4Converter( );
479 char * dst
= reinterpret_cast< char * >( & value_
);
480 size_t outbytesleft
= sizeof( value_
);
482 size_t tmp_src_avail
;
489 switch( 0xF0 & *src
)
491 case 0xE0: tmp_src_avail
= 3; break;
492 case 0xF0: tmp_src_avail
= 4; break;
493 default: tmp_src_avail
= 2; break;
497 size_t count
= iconv( converter
,
498 ICONV_CAST( & src
), & tmp_src_avail
,
499 & dst
, & outbytesleft
);
500 if( count
== (size_t)(-1) )
502 if( errno
== EILSEQ
)
504 throw Exceptions::InternalError( "Failed to initialize UCS-4 code point from UTF-8 data." );
506 else if( errno
== EINVAL
)
508 throw Exceptions::MiscellaneousRequirement( "Malformed UTF-8 value in initialization of UCS-4 code point." );
510 else if( errno
== E2BIG
)
512 throw Exceptions::InternalError( "The UTF-8 source buffer contained more than one character when initializing a single UCS-4 code point." );
516 std::ostringstream msg
;
517 msg
<< "iconv failed with an unrecognized error code: " << errno
;
518 throw Exceptions::InternalError( strrefdup( msg
) );
521 else if( outbytesleft
!= 0 )
523 throw Exceptions::InternalError( "Failed to initialize the entire UCS-4 code point when converting from UTF-8." );
528 Kernel::UnicodeCodePoint::decode_UCS4( const char ** src
, size_t * src_avail
)
532 throw Exceptions::InternalError( "Not enough data available when initializing UCS-4 code point (needs four bytes)." );
534 memcpy( reinterpret_cast< char * >( & value_
), *src
, 4 );
540 Kernel::UnicodeCodePoint::decode_UCS4( const char * src
)
542 memcpy( reinterpret_cast< char * >( & value_
), src
, 4 );
546 Kernel::UnicodeCodePoint::encode_UTF8( char ** dst
, size_t * dst_avail
) const
548 static iconv_t converter
= Helpers::requireUCS4ToUTF8Converter( );
550 const char * src
= reinterpret_cast< const char * >( & value_
);
551 size_t inbytesleft
= sizeof( value_
);
553 size_t count
= iconv( converter
,
554 ICONV_CAST( & src
), & inbytesleft
,
556 if( count
== (size_t)(-1) )
558 if( errno
== EILSEQ
)
560 throw Exceptions::InternalError( "Failed to convert UCS-4 code point to UTF-8." );
562 else if( errno
== EINVAL
)
564 throw Exceptions::InternalError( "Malformed UCS-4 value (in conversion to UTF-8)." );
566 else if( errno
== E2BIG
)
568 throw Exceptions::InternalError( "The UTF-8 destination buffer was too small when encoding a single UCS-4 code point." );
572 std::ostringstream msg
;
573 msg
<< "iconv failed with an unrecognized error code: " << errno
;
574 throw Exceptions::InternalError( strrefdup( msg
) );
577 else if( inbytesleft
!= 0 )
579 throw Exceptions::InternalError( "Failed to use the entire UCS-4 code point when converting to UTF-8." );
584 Kernel::UnicodeCodePoint::encode_UTF16BE( char ** dst
, size_t * dst_avail
) const
586 static iconv_t converter
= Helpers::requireUCS4ToUTF16BEConverter( );
588 const char * src
= reinterpret_cast< const char * >( & value_
);
589 size_t inbytesleft
= sizeof( value_
);
591 size_t count
= iconv( converter
,
592 ICONV_CAST( & src
), & inbytesleft
,
594 if( count
== (size_t)(-1) )
596 if( errno
== EILSEQ
)
598 throw Exceptions::InternalError( "Failed to convert UCS-4 code point to UTF-16-BE." );
600 else if( errno
== EINVAL
)
602 throw Exceptions::InternalError( "Malformed UCS-4 value (in conversion to UTF-16-BE)." );
604 else if( errno
== E2BIG
)
606 throw Exceptions::InternalError( "The UTF-16-BE destination buffer was too small when encoding a single UCS-4 code point." );
610 std::ostringstream msg
;
611 msg
<< "iconv failed with an unrecognized error code: " << errno
;
612 throw Exceptions::InternalError( strrefdup( msg
) );
615 else if( inbytesleft
!= 0 )
617 throw Exceptions::InternalError( "Failed to use the entire UCS-4 code point when converting to UTF-16-BE." );
622 Kernel::UnicodeCodePoint::decode_glyph_name( const char * name
)
624 static const FontMetrics::GlyphList
& glyphList
= Helpers::requireGlyphList( );
625 if( ! glyphList
.name_to_UCS4( name
, & value_
) )
627 std::ostringstream msg
;
628 msg
<< "The glyph name \"" << name
<< "\" is not in the glyph list, and cannot be converted to a UCS-4 code point." ;
629 throw Exceptions::InternalError( strrefdup( msg
) );
633 Kernel::UnicodeCodePoint
Kernel::UnicodeCodePoint::SPACE( 32 );
634 Kernel::UnicodeCodePoint
Kernel::UnicodeCodePoint::NEWLINE( 10 );