1 /* This file is part of Shapes.
3 * Shapes is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
8 * Shapes is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with Shapes. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright 2008 Henrik Tidefelt
19 #include "glyphlist.h"
20 #include "charconverters.h"
21 #include "autoonoff.h"
22 #include "shapesexceptions.h"
23 #include "charconverters.h"
24 #include "utf8tools.h"
29 #include <errno.h> // How come iconv is not enough?
33 using namespace FontMetrics
;
35 // This function is rather special, since it assumes that numbers are written in groups
36 // of four characters, separated by exactly one space.
38 // If this fails since we're not given a singleton character, this is indicated by setting
39 // *end = src and returning 0.
40 FontMetrics::GlyphList::UnicodeType
41 glyphlist_strtol( char * src
, char ** end
, iconv_t converterUTF16BEToUCS4
)
43 const size_t BUF_SIZE
= 16;
44 static char bufUTF16BE
[ BUF_SIZE
];
48 // std::cerr << "src: " ;
49 // for( const char * s = src; *s != '\0' && *s != '\r'; ++s )
54 size_t inbytesleft
= 0;
58 unsigned char * dst
= reinterpret_cast< unsigned char * >( bufUTF16BE
);
61 // Read most significant byte
66 *dst
= strtol( buf
, end
, 16 );
69 // Read least significant byte
74 *dst
= strtol( buf
, end
, 16 );
81 // end already points at the end.
88 // std::cerr << " (" << inbytesleft << " bytes: " ;
89 // for( size_t i = 0; i < inbytesleft; i += 2 )
91 // std::cerr << std::setiosflags(std::ios::hex) << *reinterpret_cast< uint16_t * >( bufUTF16BE + i )
92 // << std::resetiosflags(std::ios::hex) ;
96 const char * inbuf
= bufUTF16BE
;
98 FontMetrics::GlyphList::UnicodeType res
;
99 char * outbuf
= reinterpret_cast< char * >( & res
);;
100 size_t outbytesleft
= sizeof( FontMetrics::GlyphList::UnicodeType
);
102 // The ICONV_CAST macro is defined in config.h.
103 size_t count
= iconv( converterUTF16BEToUCS4
,
104 ICONV_CAST( & inbuf
), & inbytesleft
,
105 & outbuf
, & outbytesleft
);
106 if( count
== (size_t)(-1) )
108 if( errno
== EILSEQ
)
110 throw "Conversion from UTF-16BE to UCS-4 failed do to illegal value.";
112 else if( errno
== EINVAL
)
114 throw "Conversion from UTF-16BE to UCS-4 failed do to incomplete value.";
116 else if( errno
== E2BIG
)
123 throw "Conversion from UTF-16BE to UCS-4 failed with an unrecognized error code.";
126 if( outbytesleft
!= 0 )
128 throw "Conversion from UTF-16BE to UCS-4 produced to output.";
130 // std::cerr << " --> "
131 // << std::setiosflags(std::ios::hex) << res
132 // << std::resetiosflags(std::ios::hex)
137 GlyphList::GlyphList( std::istream
& iFile
)
139 namePtrs_
.resize( TABLE_SIZE
, 0 );
143 GlyphList::~GlyphList( )
147 GlyphList::UCS4_to_name( UnicodeType code
, const char ** dst
) const
149 if( code
< TABLE_SIZE
)
151 *dst
= namePtrs_
[ code
];
155 typedef typeof namePtrsWide_ MapType
;
156 MapType::const_iterator i
= namePtrsWide_
.find( code
);
157 if( i
== namePtrsWide_
.end( ) )
167 GlyphList::UTF8_to_name( const char * code
, const char ** dst
) const
169 // I've read that a UTF-8 value may occupy 6 bytes. Hence assume we get no more than 6 bytes of input.
170 // Then forget that this should be just one character. Then this could be 6 single byte code points.
171 // Since it seems like a UTF-16BE value my occupy 8 bytes (at least there are this big numbers in glyphlist.txt),
172 // 6 * 8 = 48 bytes should be enough.
173 const size_t BUF_SIZE
= 64;
174 static char buf
[ BUF_SIZE
];
178 throw Shapes::Exceptions::MiscellaneousRequirement( "When converting a single UTF-8 value to UTF-16BE: The source value is empty." );
181 size_t inbytesMax
= strlen( code
);
182 if( 8 * inbytesMax
> BUF_SIZE
) // 8 for the size of a UTF-16BE code point.
184 throw Shapes::Exceptions::MiscellaneousRequirement( "When converting a single UTF-8 value to UTF-16BE: This many bytes definitely represents more than one UTF-8 character." );
188 // Make sure there is just one character in the string.
190 for( const char * src
= code
; *src
!= '\0'; ++src
)
192 if( Shapes::Helpers::utf8leadByte( *src
) )
199 throw Shapes::Exceptions::MiscellaneousRequirement( "When converting a single UTF-8 value to UTF-16BE: There was not exactly one character." );
204 iconv_t converter
= Shapes::Helpers::requireUTF8ToUCS4Converter( );
206 const char * inbuf
= code
;
207 size_t inbytesleft
= inbytesMax
;
209 size_t outbytesleft
= BUF_SIZE
;
210 // The ICONV_CAST macro is defined in config.h.
211 size_t count
= iconv( converter
,
212 ICONV_CAST( & inbuf
), & inbytesleft
,
213 & outbuf
, & outbytesleft
);
214 if( count
== (size_t)(-1) )
216 if( errno
== EINVAL
)
218 throw Shapes::Exceptions::ExternalError( "The single UTF-8 character to be converted to UTF-16BE was incomplete." );
220 else if( errno
== EILSEQ
)
222 throw Shapes::Exceptions::ExternalError( "An invalid UTF-8 byte was encountered." );
224 else if( errno
== E2BIG
)
226 throw Shapes::Exceptions::InternalError( "The buffer allocated for UTF-8 to UTF-16BE conversion was too small." );
230 std::ostringstream msg
;
231 msg
<< "iconv failed with an unrecognized error code: " << errno
;
232 throw Shapes::Exceptions::InternalError( strrefdup( msg
) );
235 size_t bytesUsed
= outbuf
- buf
;
238 throw Shapes::Exceptions::ExternalError( "Conversion of one UTF-8 character to UTF-16BE resulted in more than 8 bytes." );
241 // Next we proceed in two steps. I can't see what the probelm here is, but it could be some alignment stuff...
242 // 1) Place in most significant bytes of a UnicodeType, with crap to the left.
243 UnicodeType codeUCS4
= *reinterpret_cast< const UnicodeType
* >( buf
) >> ( 8 * ( sizeof( UnicodeType
) - bytesUsed
) );
244 return UCS4_to_name( codeUCS4
, dst
);
248 GlyphList::name_to_UCS4( const char * name
, UnicodeType
* dst
) const
250 typedef typeof nameMap_ MapType
;
251 MapType::const_iterator i
= nameMap_
.find( name
);
252 if( i
== nameMap_
.end( ) )
261 GlyphList::readfile( std::istream
& iFile
)
263 size_t BUF_SIZE
= 255;
264 char buf
[ BUF_SIZE
];
266 iconv_t converterUTF16BEToUCS4
= Shapes::Helpers::requireUTF16BEToUCS4Converter( );
272 for( iFile
.get( c
); c
!= '\n'; iFile
.get( c
) )
278 iFile
.getline( buf
+ 1, BUF_SIZE
- 1 );
279 while( buf
[ 0 ] != '#' )
281 char * delim
= strchr( buf
, ';' );
283 char * name
= strdup( buf
);
284 char * end
; // Not const, since strtol want's it that way.
289 code
= glyphlist_strtol( delim
+ 1, & end
, converterUTF16BEToUCS4
);
292 // This is a glyph name that is mapped to a sequence of characters.
293 // It is not what we want.
294 iFile
.getline( buf
, BUF_SIZE
);
298 catch( const char * ball
)
300 std::cerr
<< "When dealing with \"" << name
<< "\", the following error (ignored) occurred: "
301 << ball
<< std::endl
;
302 iFile
.getline( buf
, BUF_SIZE
);
305 catch( const Shapes::Exceptions::Exception
& ball
)
311 std::ostringstream msg
;
312 msg
<< "Failed to catch ball. name: " << name
;
313 throw Shapes::Exceptions::InternalError( msg
);
316 if( code
!= 0 && *end
!= '\0' && *end
!= '\r' )
318 std::ostringstream oss
;
319 oss
<< "An error in the glyphlist file was found near the character \"" << name
<< "\"." ;
323 // if( code != 0 && code < 50 )
325 // std::cerr << "Found small character: " << code << " --> " << name << std::endl ;
328 nameMem_
.push_back( name
);
329 if( code
< TABLE_SIZE
)
331 if( namePtrs_
[ code
] != 0 )
333 if( strncmp( namePtrs_
[ code
], "afii", 4 ) == 0 ||
334 strlen( namePtrs_
[ code
] ) < strlen( name
) )
336 // Names starting with "afii", or that are shorter than their alternatives are overridden.
337 namePtrs_
[ code
] = name
;
341 // std::cerr << "Discarding name with code " << code
342 // << ", kept name: " << namePtrs_[ code ]
343 // << ", new name (discarded): " << name << std::endl ;
348 namePtrs_
[ code
] = name
;
353 typedef typeof namePtrsWide_ MapType
;
354 MapType::iterator i
= namePtrsWide_
.find( code
);
355 if( i
!= namePtrsWide_
.end( ) )
357 if( strncmp( i
->second
, "afii", 4 ) == 0 ||
358 strlen( i
->second
) < strlen( name
) )
360 // Names starting with "afii", or that are shorter than their alternatives are overridden.
361 namePtrsWide_
.insert( i
, MapType::value_type( code
, name
) );
365 // std::cerr << "Discarding name with code " << code
366 // << ", kept name: " << i->second
367 // << ", new name (discarded): " << name << std::endl ;
372 namePtrsWide_
[ code
] = name
;
375 nameMap_
[ name
] = code
;
376 iFile
.getline( buf
, BUF_SIZE
);
381 GlyphList::size( ) const
383 return nameMem_
.size( );