1 /* This file is part of Shapes.
3 * Shapes is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
8 * Shapes is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with Shapes. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright 2008, 2010 Henrik Tidefelt
19 #include "strrefdup.h"
20 #include "charconverters.h"
21 #include "shapesexceptions.h"
22 #include "glyphlist.h"
23 #include "characterencoding.h"
24 #include "texttypes.h"
28 #include <errno.h>// How come iconv is not enough?
35 using namespace Shapes
;
38 iconv_maybe_open( iconv_t
* converter
, const char * to_encoding
, const char * from_encoding
)
40 if( *converter
== (iconv_t
)( -1 ) )
42 *converter
= iconv_open( to_encoding
, from_encoding
);
43 if( *converter
== (iconv_t
)( -1 ) )
45 std::ostringstream msg
;
46 msg
<< "iconv_open failed to create converter from " << from_encoding
<< " to " << to_encoding
<< "." ;
47 throw Exceptions::ExternalError( strrefdup( msg
) );
53 Helpers::requireUTF8ToMacRomanConverter( bool cleanup
)
55 static iconv_t converter
= (iconv_t
)( - 1 );
58 if( converter
!= (iconv_t
)( -1 ) )
60 iconv_close( converter
);
61 converter
= (iconv_t
)( -1 );
66 iconv_maybe_open( & converter
,
67 MAC_ROMAN
, // This is meant to be what is called MacRoman in PDF.
74 Helpers::requireMacRomanToUTF8Converter( bool cleanup
)
76 static iconv_t converter
= (iconv_t
)( - 1 );
79 if( converter
!= (iconv_t
)( -1 ) )
81 iconv_close( converter
);
82 converter
= (iconv_t
)( -1 );
87 iconv_maybe_open( & converter
,
89 MAC_ROMAN
); // This is meant to be what is called MacRoman in PDF.
95 Helpers::requireUTF8ToASCIIConverter( bool cleanup
)
97 static iconv_t converter
= (iconv_t
)( - 1 );
100 if( converter
!= (iconv_t
)( -1 ) )
102 iconv_close( converter
);
103 converter
= (iconv_t
)( -1 );
108 iconv_maybe_open( & converter
,
109 "ASCII", // This is used for the names of glyphs in a font
116 Helpers::requireUTF8ToUCS4Converter( bool cleanup
)
118 static iconv_t converter
= (iconv_t
)( - 1 );
121 if( converter
!= (iconv_t
)( -1 ) )
123 iconv_close( converter
);
124 converter
= (iconv_t
)( -1 );
129 iconv_maybe_open( & converter
,
137 Helpers::requireUCS4ToUTF8Converter( bool cleanup
)
139 static iconv_t converter
= (iconv_t
)( - 1 );
142 if( converter
!= (iconv_t
)( -1 ) )
144 iconv_close( converter
);
145 converter
= (iconv_t
)( -1 );
150 iconv_maybe_open( & converter
,
158 Helpers::requireUCS4ToMacRomanConverter( bool cleanup
)
160 static iconv_t converter
= (iconv_t
)( - 1 );
163 if( converter
!= (iconv_t
)( -1 ) )
165 iconv_close( converter
);
166 converter
= (iconv_t
)( -1 );
171 iconv_maybe_open( & converter
,
179 Helpers::requireUTF16BEToUCS4Converter( bool cleanup
)
181 static iconv_t converter
= (iconv_t
)( - 1 );
184 if( converter
!= (iconv_t
)( -1 ) )
186 iconv_close( converter
);
187 converter
= (iconv_t
)( -1 );
192 iconv_maybe_open( & converter
,
200 Helpers::requireUCS4ToUTF16BEConverter( bool cleanup
)
202 static iconv_t converter
= (iconv_t
)( - 1 );
205 if( converter
!= (iconv_t
)( -1 ) )
207 iconv_close( converter
);
208 converter
= (iconv_t
)( -1 );
213 iconv_maybe_open( & converter
,
221 Helpers::requireUTF8ToWinANSIConverter( bool cleanup
)
223 static iconv_t converter
= (iconv_t
)( - 1 );
226 if( converter
!= (iconv_t
)( -1 ) )
228 iconv_close( converter
);
229 converter
= (iconv_t
)( -1 );
234 iconv_maybe_open( & converter
,
235 "LATIN1", // This is meant to be what is called WinANSI in PDF.
242 Helpers::requireUTF8ToUTF16BEConverter( bool cleanup
)
244 static iconv_t converter
= (iconv_t
)( - 1 );
247 if( converter
!= (iconv_t
)( -1 ) )
249 iconv_close( converter
);
250 converter
= (iconv_t
)( -1 );
255 iconv_maybe_open( & converter
,
262 const FontMetrics::GlyphList
&
263 Helpers::requireGlyphList( bool cleanup
)
265 static const FontMetrics::GlyphList
* converter
= 0;
278 std::string filename
= Lang::Font::searchGlyphList( );
279 std::ifstream
iFile( filename
.c_str( ) );
280 if( ! iFile
.is_open( ) )
282 std::ostringstream oss
;
283 oss
<< "Could locate, but not open the glyph list " << filename
;
284 throw Exceptions::ExternalError( strrefdup( oss
) );
288 converter
= new FontMetrics::GlyphList( iFile
);
290 catch( const char * ball
)
292 std::ostringstream oss
;
293 oss
<< "Parsing the glyph list " << filename
<< " resulted in the error: " << ball
;
294 throw Exceptions::ExternalError( strrefdup( oss
) );
296 catch( const std::string ball
)
298 std::ostringstream oss
;
299 oss
<< "Parsing the glyph list " << filename
<< " resulted in the error: " << ball
;
300 throw Exceptions::ExternalError( strrefdup( oss
) );
302 catch( const Shapes::Exceptions::Exception
& ball
)
304 std::cerr
<< "Parsing the glyph list " << filename
<< " resulted in an error. Rethrowing." << std::endl
;
309 throw Exceptions::InternalError( "An unrecognized exception was caught from glyph list parsing." );
316 const FontMetrics::CharacterEncoding
&
317 Helpers::requireMacRomanEncoding( bool cleanup
)
319 static const FontMetrics::CharacterEncoding
* converter
= 0;
332 std::string filename
= Lang::Font::searchCharacterEncoding( "MacRoman" );
333 std::ifstream
iFile( filename
.c_str( ) );
334 if( ! iFile
.is_open( ) )
336 std::ostringstream oss
;
337 oss
<< "Could locate, but not open the character encoding " << filename
;
338 throw Exceptions::ExternalError( strrefdup( oss
) );
342 converter
= new FontMetrics::CharacterEncoding( iFile
);
344 catch( const char * ball
)
346 std::ostringstream oss
;
347 oss
<< "Parsing the character encoding " << filename
<< " resulted in the error: " << ball
;
348 throw Exceptions::ExternalError( strrefdup( oss
) );
350 catch( const std::string ball
)
352 std::ostringstream oss
;
353 oss
<< "Parsing the character encoding " << filename
<< " resulted in the error: " << ball
;
354 throw Exceptions::ExternalError( strrefdup( oss
) );
356 catch( const Shapes::Exceptions::Exception
& ball
)
358 std::cerr
<< "Parsing the character encoding " << filename
<< " resulted in an error. Rethrowing." << std::endl
;
363 throw Exceptions::InternalError( "An unrecognized exception was caught from character encoding parsing." );
371 Kernel::UnicodeCodePoint::get_MacRoman( ) const
373 static iconv_t converter
= Helpers::requireUCS4ToMacRomanConverter( );
375 const size_t BUF_SIZE
= 1;
376 char buf
[ BUF_SIZE
];
378 size_t outbytesleft
= BUF_SIZE
;
380 const char * src
= reinterpret_cast< const char * >( & value_
);
381 size_t inbytesleft
= sizeof( value_
);
383 size_t count
= iconv( converter
,
384 ICONV_CAST( & src
), & inbytesleft
,
385 & dst
, & outbytesleft
);
386 if( count
== (size_t)(-1) )
388 if( errno
== EILSEQ
)
390 std::ostringstream msg
;
391 msg
<< "The UCS-4 code point U+" << std::hex
<< value_
<< " cannot be represented in MacRoman encodig." ;
392 throw Exceptions::MiscellaneousRequirement( strrefdup( msg
) );
394 else if( errno
== EINVAL
)
396 throw Exceptions::InternalError( "Malformed UCS-4 value (in conversion to MacRoman)." );
398 else if( errno
== E2BIG
)
400 throw Exceptions::InternalError( "The MacRoman destination buffer was too small when encoding a single UCS-4 code point." );
404 std::ostringstream msg
;
405 msg
<< "iconv failed with an unrecognized error code: " << errno
;
406 throw Exceptions::InternalError( strrefdup( msg
) );
409 else if( inbytesleft
!= 0 )
411 throw Exceptions::InternalError( "Failed to use the entire UCS-4 code point when converting to MacRoman." );
413 return *reinterpret_cast< unsigned char * >( buf
);
417 Kernel::UnicodeCodePoint::decode_UTF8( const char ** src
, size_t * src_avail
)
419 static iconv_t converter
= Helpers::requireUTF8ToUCS4Converter( );
421 char * dst
= reinterpret_cast< char * >( & value_
);
422 size_t outbytesleft
= sizeof( value_
);
424 size_t tmp_src_avail
;
431 switch( 0xF0 & **src
)
433 case 0xE0: tmp_src_avail
= 3; break;
434 case 0xF0: tmp_src_avail
= 4; break;
435 default: tmp_src_avail
= 2; break;
438 if( tmp_src_avail
> *src_avail
)
440 throw Exceptions::InternalError( "The UTF-8 source did not contain a complete character when initializing a single UCS-4 code point." );
442 *src_avail
-= tmp_src_avail
;
444 size_t count
= iconv( converter
,
445 ICONV_CAST( src
), & tmp_src_avail
,
446 & dst
, & outbytesleft
);
447 if( count
== (size_t)(-1) )
449 if( errno
== EILSEQ
)
451 throw Exceptions::InternalError( "Failed to initialize UCS-4 code point from UTF-8 data." );
453 else if( errno
== EINVAL
)
455 throw Exceptions::MiscellaneousRequirement( "Malformed UTF-8 value in initialization of UCS-4 code point." );
457 else if( errno
== E2BIG
)
459 throw Exceptions::InternalError( "The UTF-8 source buffer contained more than one character when initializing a single UCS-4 code point." );
463 std::ostringstream msg
;
464 msg
<< "iconv failed with an unrecognized error code: " << errno
;
465 throw Exceptions::InternalError( strrefdup( msg
) );
468 else if( outbytesleft
!= 0 )
470 throw Exceptions::InternalError( "Failed to initialize the entire UCS-4 code point when converting from UTF-8." );
475 Kernel::UnicodeCodePoint::decode_UTF8( const char * src
)
477 static iconv_t converter
= Helpers::requireUTF8ToUCS4Converter( );
479 char * dst
= reinterpret_cast< char * >( & value_
);
480 size_t outbytesleft
= sizeof( value_
);
482 size_t tmp_src_avail
;
489 switch( 0xF0 & *src
)
491 case 0xE0: tmp_src_avail
= 3; break;
492 case 0xF0: tmp_src_avail
= 4; break;
493 default: tmp_src_avail
= 2; break;
497 size_t count
= iconv( converter
,
498 ICONV_CAST( & src
), & tmp_src_avail
,
499 & dst
, & outbytesleft
);
500 if( count
== (size_t)(-1) )
502 if( errno
== EILSEQ
)
504 throw Exceptions::InternalError( "Failed to initialize UCS-4 code point from UTF-8 data." );
506 else if( errno
== EINVAL
)
508 throw Exceptions::MiscellaneousRequirement( "Malformed UTF-8 value in initialization of UCS-4 code point." );
510 else if( errno
== E2BIG
)
512 throw Exceptions::InternalError( "The UTF-8 source buffer contained more than one character when initializing a single UCS-4 code point." );
516 std::ostringstream msg
;
517 msg
<< "iconv failed with an unrecognized error code: " << errno
;
518 throw Exceptions::InternalError( strrefdup( msg
) );
521 else if( outbytesleft
!= 0 )
523 throw Exceptions::InternalError( "Failed to initialize the entire UCS-4 code point when converting from UTF-8." );
528 Kernel::UnicodeCodePoint::decode_UCS4( const char ** src
, size_t * src_avail
)
532 throw Exceptions::InternalError( "Not enough data available when initializing UCS-4 code point (needs four bytes)." );
534 memcpy( reinterpret_cast< char * >( & value_
), *src
, 4 );
540 Kernel::UnicodeCodePoint::decode_UCS4( const char * src
)
542 memcpy( reinterpret_cast< char * >( & value_
), src
, 4 );
546 Kernel::UnicodeCodePoint::encode_UTF8( char ** dst
, size_t * dst_avail
) const
548 static iconv_t converter
= Helpers::requireUCS4ToUTF8Converter( );
550 const char * src
= reinterpret_cast< const char * >( & value_
);
551 size_t inbytesleft
= sizeof( value_
);
553 size_t count
= iconv( converter
,
554 ICONV_CAST( & src
), & inbytesleft
,
556 if( count
== (size_t)(-1) )
558 if( errno
== EILSEQ
)
560 throw Exceptions::InternalError( "Failed to convert UCS-4 code point to UTF-8." );
562 else if( errno
== EINVAL
)
564 throw Exceptions::InternalError( "Malformed UCS-4 value (in conversion to UTF-8)." );
566 else if( errno
== E2BIG
)
568 throw Exceptions::InternalError( "The UTF-8 destination buffer was too small when encoding a single UCS-4 code point." );
572 std::ostringstream msg
;
573 msg
<< "iconv failed with an unrecognized error code: " << errno
;
574 throw Exceptions::InternalError( strrefdup( msg
) );
577 else if( inbytesleft
!= 0 )
579 throw Exceptions::InternalError( "Failed to use the entire UCS-4 code point when converting to UTF-8." );
584 Kernel::UnicodeCodePoint::encode_UTF16BE( char ** dst
, size_t * dst_avail
) const
586 static iconv_t converter
= Helpers::requireUCS4ToUTF16BEConverter( );
588 const char * src
= reinterpret_cast< const char * >( & value_
);
589 size_t inbytesleft
= sizeof( value_
);
591 size_t count
= iconv( converter
,
592 ICONV_CAST( & src
), & inbytesleft
,
594 if( count
== (size_t)(-1) )
596 if( errno
== EILSEQ
)
598 throw Exceptions::InternalError( "Failed to convert UCS-4 code point to UTF-16-BE." );
600 else if( errno
== EINVAL
)
602 throw Exceptions::InternalError( "Malformed UCS-4 value (in conversion to UTF-16-BE)." );
604 else if( errno
== E2BIG
)
606 throw Exceptions::InternalError( "The UTF-16-BE destination buffer was too small when encoding a single UCS-4 code point." );
610 std::ostringstream msg
;
611 msg
<< "iconv failed with an unrecognized error code: " << errno
;
612 throw Exceptions::InternalError( strrefdup( msg
) );
615 else if( inbytesleft
!= 0 )
617 throw Exceptions::InternalError( "Failed to use the entire UCS-4 code point when converting to UTF-16-BE." );
622 Kernel::UnicodeCodePoint::decode_glyph_name( const char * name
)
624 static const FontMetrics::GlyphList
& glyphList
= Helpers::requireGlyphList( );
625 if( ! glyphList
.name_to_UCS4( name
, & value_
) )
627 std::ostringstream msg
;
628 msg
<< "The glyph name \"" << name
<< "\" is not in the glyph list, and cannot be converted to a UCS-4 code point." ;
629 throw Exceptions::InternalError( strrefdup( msg
) );
633 Kernel::UnicodeCodePoint
Kernel::UnicodeCodePoint::SPACE( 32 );
634 Kernel::UnicodeCodePoint
Kernel::UnicodeCodePoint::NEWLINE( 10 );