Updating the changelog in the VERSION file, and version_sync.
[shapes.git] / source / glyphlist.cc
blob93b320a3a235ad96ab3d423141663b694087f161
1 /* This file is part of Shapes.
3 * Shapes is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
6 * any later version.
8 * Shapes is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with Shapes. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright 2008 Henrik Tidefelt
19 #include "glyphlist.h"
20 #include "charconverters.h"
21 #include "autoonoff.h"
22 #include "shapesexceptions.h"
23 #include "charconverters.h"
24 #include "utf8tools.h"
25 #include "config.h"
27 #include <cstring>
28 #include <sstream>
29 #include <errno.h> // How come iconv is not enough?
30 #include <iomanip>
31 #include <cstdlib>
33 using namespace FontMetrics;
35 // This function is rather special, since it assumes that numbers are written in groups
36 // of four characters, separated by exactly one space.
37 // Use it with care!
38 // If this fails since we're not given a singleton character, this is indicated by setting
39 // *end = src and returning 0.
40 FontMetrics::GlyphList::UnicodeType
41 glyphlist_strtol( char * src, char ** end, iconv_t converterUTF16BEToUCS4 )
43 const size_t BUF_SIZE = 16;
44 static char bufUTF16BE[ BUF_SIZE ];
46 char * start = src;
48 // std::cerr << "src: " ;
49 // for( const char * s = src; *s != '\0' && *s != '\r'; ++s )
50 // {
51 // std::cerr << *s ;
52 // }
54 size_t inbytesleft = 0;
56 char buf[ 4 ];
57 buf[ 2 ] = '\0';
58 unsigned char * dst = reinterpret_cast< unsigned char * >( bufUTF16BE );
59 while( true )
61 // Read most significant byte
62 buf[ 0 ] = *src;
63 ++src;
64 buf[ 1 ] = *src;
65 ++src;
66 *dst = strtol( buf, end, 16 );
67 ++dst;
69 // Read least significant byte
70 buf[ 0 ] = *src;
71 ++src;
72 buf[ 1 ] = *src;
73 ++src;
74 *dst = strtol( buf, end, 16 );
75 ++dst;
77 inbytesleft += 2;
79 if( *src != ' ' )
81 // end already points at the end.
82 break;
84 ++src;
88 // std::cerr << " (" << inbytesleft << " bytes: " ;
89 // for( size_t i = 0; i < inbytesleft; i += 2 )
90 // {
91 // std::cerr << std::setiosflags(std::ios::hex) << *reinterpret_cast< uint16_t * >( bufUTF16BE + i )
92 // << std::resetiosflags(std::ios::hex) ;
93 // }
94 // std::cerr << ")" ;
96 const char * inbuf = bufUTF16BE;
98 FontMetrics::GlyphList::UnicodeType res;
99 char * outbuf = reinterpret_cast< char * >( & res );;
100 size_t outbytesleft = sizeof( FontMetrics::GlyphList::UnicodeType );
102 // The ICONV_CAST macro is defined in config.h.
103 size_t count = iconv( converterUTF16BEToUCS4,
104 ICONV_CAST( & inbuf ), & inbytesleft,
105 & outbuf, & outbytesleft );
106 if( count == (size_t)(-1) )
108 if( errno == EILSEQ )
110 throw "Conversion from UTF-16BE to UCS-4 failed do to illegal value.";
112 else if( errno == EINVAL )
114 throw "Conversion from UTF-16BE to UCS-4 failed do to incomplete value.";
116 else if( errno == E2BIG )
118 *end = start;
119 return 0;
121 else
123 throw "Conversion from UTF-16BE to UCS-4 failed with an unrecognized error code.";
126 if( outbytesleft != 0 )
128 throw "Conversion from UTF-16BE to UCS-4 produced to output.";
130 // std::cerr << " --> "
131 // << std::setiosflags(std::ios::hex) << res
132 // << std::resetiosflags(std::ios::hex)
133 // << std::endl ;
134 return res;
137 GlyphList::GlyphList( std::istream & iFile )
139 namePtrs_.resize( TABLE_SIZE, 0 );
140 readfile( iFile );
143 GlyphList::~GlyphList( )
146 bool
147 GlyphList::UCS4_to_name( UnicodeType code, const char ** dst ) const
149 if( code < TABLE_SIZE )
151 *dst = namePtrs_[ code ];
152 return *dst != 0;
155 typedef typeof namePtrsWide_ MapType;
156 MapType::const_iterator i = namePtrsWide_.find( code );
157 if( i == namePtrsWide_.end( ) )
159 return false;
162 *dst = i->second;
163 return true;
166 bool
167 GlyphList::UTF8_to_name( const char * code, const char ** dst ) const
169 // I've read that a UTF-8 value may occupy 6 bytes. Hence assume we get no more than 6 bytes of input.
170 // Then forget that this should be just one character. Then this could be 6 single byte code points.
171 // Since it seems like a UTF-16BE value my occupy 8 bytes (at least there are this big numbers in glyphlist.txt),
172 // 6 * 8 = 48 bytes should be enough.
173 const size_t BUF_SIZE = 64;
174 static char buf[ BUF_SIZE ];
176 if( *code == '\0' )
178 throw Shapes::Exceptions::MiscellaneousRequirement( "When converting a single UTF-8 value to UTF-16BE: The source value is empty." );
181 size_t inbytesMax = strlen( code );
182 if( 8 * inbytesMax > BUF_SIZE ) // 8 for the size of a UTF-16BE code point.
184 throw Shapes::Exceptions::MiscellaneousRequirement( "When converting a single UTF-8 value to UTF-16BE: This many bytes definitely represents more than one UTF-8 character." );
188 // Make sure there is just one character in the string.
189 size_t count = 0;
190 for( const char * src = code; *src != '\0'; ++src )
192 if( Shapes::Helpers::utf8leadByte( *src ) )
194 ++count;
197 if( count != 1 )
199 throw Shapes::Exceptions::MiscellaneousRequirement( "When converting a single UTF-8 value to UTF-16BE: There was not exactly one character." );
204 iconv_t converter = Shapes::Helpers::requireUTF8ToUCS4Converter( );
206 const char * inbuf = code;
207 size_t inbytesleft = inbytesMax;
208 char * outbuf = buf;
209 size_t outbytesleft = BUF_SIZE;
210 // The ICONV_CAST macro is defined in config.h.
211 size_t count = iconv( converter,
212 ICONV_CAST( & inbuf ), & inbytesleft,
213 & outbuf, & outbytesleft );
214 if( count == (size_t)(-1) )
216 if( errno == EINVAL )
218 throw Shapes::Exceptions::ExternalError( "The single UTF-8 character to be converted to UTF-16BE was incomplete." );
220 else if( errno == EILSEQ )
222 throw Shapes::Exceptions::ExternalError( "An invalid UTF-8 byte was encountered." );
224 else if( errno == E2BIG )
226 throw Shapes::Exceptions::InternalError( "The buffer allocated for UTF-8 to UTF-16BE conversion was too small." );
228 else
230 std::ostringstream msg;
231 msg << "iconv failed with an unrecognized error code: " << errno ;
232 throw Shapes::Exceptions::InternalError( strrefdup( msg ) );
235 size_t bytesUsed = outbuf - buf;
236 if( bytesUsed > 8 )
238 throw Shapes::Exceptions::ExternalError( "Conversion of one UTF-8 character to UTF-16BE resulted in more than 8 bytes." );
241 // Next we proceed in two steps. I can't see what the probelm here is, but it could be some alignment stuff...
242 // 1) Place in most significant bytes of a UnicodeType, with crap to the left.
243 UnicodeType codeUCS4 = *reinterpret_cast< const UnicodeType * >( buf ) >> ( 8 * ( sizeof( UnicodeType ) - bytesUsed ) );
244 return UCS4_to_name( codeUCS4, dst );
247 bool
248 GlyphList::name_to_UCS4( const char * name, UnicodeType * dst ) const
250 typedef typeof nameMap_ MapType;
251 MapType::const_iterator i = nameMap_.find( name );
252 if( i == nameMap_.end( ) )
254 return false;
256 *dst = i->second;
257 return true;
260 void
261 GlyphList::readfile( std::istream & iFile )
263 size_t BUF_SIZE = 255;
264 char buf[ BUF_SIZE ];
266 iconv_t converterUTF16BEToUCS4 = Shapes::Helpers::requireUTF16BEToUCS4Converter( );
268 char c;
269 iFile.get( c );
270 while( c == '#' )
272 for( iFile.get( c ); c != '\n'; iFile.get( c ) )
274 iFile.get( c );
277 buf[ 0 ] = c;
278 iFile.getline( buf + 1, BUF_SIZE - 1 );
279 while( buf[ 0 ] != '#' )
281 char * delim = strchr( buf, ';' );
282 *delim = '\0';
283 char * name = strdup( buf );
284 char * end; // Not const, since strtol want's it that way.
286 UnicodeType code;
289 code = glyphlist_strtol( delim + 1, & end, converterUTF16BEToUCS4 );
290 if( code == 0 )
292 // This is a glyph name that is mapped to a sequence of characters.
293 // It is not what we want.
294 iFile.getline( buf, BUF_SIZE );
295 continue;
298 catch( const char * ball )
300 std::cerr << "When dealing with \"" << name << "\", the following error (ignored) occurred: "
301 << ball << std::endl ;
302 iFile.getline( buf, BUF_SIZE );
303 continue;
305 catch( const Shapes::Exceptions::Exception & ball )
307 throw;
309 catch( ... )
311 std::ostringstream msg;
312 msg << "Failed to catch ball. name: " << name ;
313 throw Shapes::Exceptions::InternalError( msg );
316 if( code != 0 && *end != '\0' && *end != '\r' )
318 std::ostringstream oss;
319 oss << "An error in the glyphlist file was found near the character \"" << name << "\"." ;
320 throw oss.str( );
323 // if( code != 0 && code < 50 )
324 // {
325 // std::cerr << "Found small character: " << code << " --> " << name << std::endl ;
326 // }
328 nameMem_.push_back( name );
329 if( code < TABLE_SIZE )
331 if( namePtrs_[ code ] != 0 )
333 if( strncmp( namePtrs_[ code ], "afii", 4 ) == 0 ||
334 strlen( namePtrs_[ code ] ) < strlen( name ) )
336 // Names starting with "afii", or that are shorter than their alternatives are overridden.
337 namePtrs_[ code ] = name;
339 // else
340 // {
341 // std::cerr << "Discarding name with code " << code
342 // << ", kept name: " << namePtrs_[ code ]
343 // << ", new name (discarded): " << name << std::endl ;
344 // }
346 else
348 namePtrs_[ code ] = name;
351 else
353 typedef typeof namePtrsWide_ MapType;
354 MapType::iterator i = namePtrsWide_.find( code );
355 if( i != namePtrsWide_.end( ) )
357 if( strncmp( i->second, "afii", 4 ) == 0 ||
358 strlen( i->second ) < strlen( name ) )
360 // Names starting with "afii", or that are shorter than their alternatives are overridden.
361 namePtrsWide_.insert( i, MapType::value_type( code, name ) );
363 // else
364 // {
365 // std::cerr << "Discarding name with code " << code
366 // << ", kept name: " << i->second
367 // << ", new name (discarded): " << name << std::endl ;
368 // }
370 else
372 namePtrsWide_[ code ] = name;
375 nameMap_[ name ] = code;
376 iFile.getline( buf, BUF_SIZE );
380 size_t
381 GlyphList::size( ) const
383 return nameMem_.size( );