Update procedures
[shapes.git] / source / glyphlist.cc
blob1e0fd085a231e563362b522edb831653afc46f59
1 /* This file is part of Shapes.
3 * Shapes is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
6 * any later version.
8 * Shapes is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with Shapes. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright 2008, 2010 Henrik Tidefelt
19 #include "glyphlist.h"
20 #include "charconverters.h"
21 #include "autoonoff.h"
22 #include "shapesexceptions.h"
23 #include "charconverters.h"
24 #include "utf8tools.h"
25 #include "warn.h"
26 #include "config.h"
28 #include <cstring>
29 #include <sstream>
30 #include <errno.h> // How come iconv is not enough?
31 #include <iomanip>
32 #include <cstdlib>
34 using namespace FontMetrics;
36 // This function is rather special, since it assumes that numbers are written in groups
37 // of four characters, separated by exactly one space.
38 // Use it with care!
39 // If this fails since we're not given a singleton character, this is indicated by setting
40 // *end = src and returning 0.
41 FontMetrics::GlyphList::UnicodeType
42 glyphlist_strtol( char * src, char ** end, iconv_t converterUTF16BEToUCS4 )
44 const size_t BUF_SIZE = 16;
45 static char bufUTF16BE[ BUF_SIZE ];
47 char * start = src;
49 size_t inbytesleft = 0;
51 char buf[ 4 ];
52 buf[ 2 ] = '\0';
53 unsigned char * dst = reinterpret_cast< unsigned char * >( bufUTF16BE );
54 while( true )
56 // Read most significant byte
57 buf[ 0 ] = *src;
58 ++src;
59 buf[ 1 ] = *src;
60 ++src;
61 *dst = strtol( buf, end, 16 );
62 ++dst;
64 // Read least significant byte
65 buf[ 0 ] = *src;
66 ++src;
67 buf[ 1 ] = *src;
68 ++src;
69 *dst = strtol( buf, end, 16 );
70 ++dst;
72 inbytesleft += 2;
74 if( *src != ' ' )
76 // end already points at the end.
77 break;
79 ++src;
83 const char * inbuf = bufUTF16BE;
85 FontMetrics::GlyphList::UnicodeType res;
86 char * outbuf = reinterpret_cast< char * >( & res );;
87 size_t outbytesleft = sizeof( FontMetrics::GlyphList::UnicodeType );
89 // The ICONV_CAST macro is defined in config.h.
90 size_t count = iconv( converterUTF16BEToUCS4,
91 ICONV_CAST( & inbuf ), & inbytesleft,
92 & outbuf, & outbytesleft );
93 if( count == (size_t)(-1) )
95 if( errno == EILSEQ )
97 throw "Conversion from UTF-16BE to UCS-4 failed do to illegal value.";
99 else if( errno == EINVAL )
101 throw "Conversion from UTF-16BE to UCS-4 failed do to incomplete value.";
103 else if( errno == E2BIG )
105 *end = start;
106 return 0;
108 else
110 throw "Conversion from UTF-16BE to UCS-4 failed with an unrecognized error code.";
113 if( outbytesleft != 0 )
115 throw "Conversion from UTF-16BE to UCS-4 produced to output.";
117 return res;
120 GlyphList::GlyphList( std::istream & iFile )
122 namePtrs_.resize( TABLE_SIZE, 0 );
123 readfile( iFile );
126 GlyphList::~GlyphList( )
129 bool
130 GlyphList::UCS4_to_name( UnicodeType code, const char ** dst ) const
132 if( code < TABLE_SIZE )
134 *dst = namePtrs_[ code ];
135 return *dst != 0;
138 typedef typeof namePtrsWide_ MapType;
139 MapType::const_iterator i = namePtrsWide_.find( code );
140 if( i == namePtrsWide_.end( ) )
142 return false;
145 *dst = i->second;
146 return true;
149 bool
150 GlyphList::UTF8_to_name( const char * code, const char ** dst ) const
152 // I've read that a UTF-8 value may occupy 6 bytes. Hence assume we get no more than 6 bytes of input.
153 // Then forget that this should be just one character. Then this could be 6 single byte code points.
154 // Since it seems like a UTF-16BE value my occupy 8 bytes (at least there are this big numbers in glyphlist.txt),
155 // 6 * 8 = 48 bytes should be enough.
156 const size_t BUF_SIZE = 64;
157 static char buf[ BUF_SIZE ];
159 if( *code == '\0' )
161 throw Shapes::Exceptions::MiscellaneousRequirement( "When converting a single UTF-8 value to UTF-16BE: The source value is empty." );
164 size_t inbytesMax = strlen( code );
165 if( 8 * inbytesMax > BUF_SIZE ) // 8 for the size of a UTF-16BE code point.
167 throw Shapes::Exceptions::MiscellaneousRequirement( "When converting a single UTF-8 value to UTF-16BE: This many bytes definitely represents more than one UTF-8 character." );
171 // Make sure there is just one character in the string.
172 size_t count = 0;
173 for( const char * src = code; *src != '\0'; ++src )
175 if( Shapes::Helpers::utf8leadByte( *src ) )
177 ++count;
180 if( count != 1 )
182 throw Shapes::Exceptions::MiscellaneousRequirement( "When converting a single UTF-8 value to UTF-16BE: There was not exactly one character." );
187 iconv_t converter = Shapes::Helpers::requireUTF8ToUCS4Converter( );
189 const char * inbuf = code;
190 size_t inbytesleft = inbytesMax;
191 char * outbuf = buf;
192 size_t outbytesleft = BUF_SIZE;
193 // The ICONV_CAST macro is defined in config.h.
194 size_t count = iconv( converter,
195 ICONV_CAST( & inbuf ), & inbytesleft,
196 & outbuf, & outbytesleft );
197 if( count == (size_t)(-1) )
199 if( errno == EINVAL )
201 throw Shapes::Exceptions::ExternalError( "The single UTF-8 character to be converted to UTF-16BE was incomplete." );
203 else if( errno == EILSEQ )
205 throw Shapes::Exceptions::ExternalError( "An invalid UTF-8 byte was encountered." );
207 else if( errno == E2BIG )
209 throw Shapes::Exceptions::InternalError( "The buffer allocated for UTF-8 to UTF-16BE conversion was too small." );
211 else
213 std::ostringstream msg;
214 msg << "iconv failed with an unrecognized error code: " << errno ;
215 throw Shapes::Exceptions::InternalError( strrefdup( msg ) );
218 size_t bytesUsed = outbuf - buf;
219 if( bytesUsed > 8 )
221 throw Shapes::Exceptions::ExternalError( "Conversion of one UTF-8 character to UTF-16BE resulted in more than 8 bytes." );
224 // Next we proceed in two steps. I can't see what the probelm here is, but it could be some alignment stuff...
225 // 1) Place in most significant bytes of a UnicodeType, with crap to the left.
226 UnicodeType codeUCS4 = *reinterpret_cast< const UnicodeType * >( buf ) >> ( 8 * ( sizeof( UnicodeType ) - bytesUsed ) );
227 return UCS4_to_name( codeUCS4, dst );
230 bool
231 GlyphList::name_to_UCS4( const char * name, UnicodeType * dst ) const
233 typedef typeof nameMap_ MapType;
234 MapType::const_iterator i = nameMap_.find( name );
235 if( i == nameMap_.end( ) )
237 return false;
239 *dst = i->second;
240 return true;
243 void
244 GlyphList::readfile( std::istream & iFile )
246 size_t BUF_SIZE = 255;
247 char buf[ BUF_SIZE ];
249 iconv_t converterUTF16BEToUCS4 = Shapes::Helpers::requireUTF16BEToUCS4Converter( );
251 char c;
252 iFile.get( c );
253 while( c == '#' )
255 for( iFile.get( c ); c != '\n'; iFile.get( c ) )
257 iFile.get( c );
260 buf[ 0 ] = c;
261 iFile.getline( buf + 1, BUF_SIZE - 1 );
262 while( buf[ 0 ] != '#' )
264 char * delim = strchr( buf, ';' );
265 *delim = '\0';
266 char * name = strdup( buf );
267 char * end; // Not const, since strtol want's it that way.
269 UnicodeType code;
272 code = glyphlist_strtol( delim + 1, & end, converterUTF16BEToUCS4 );
273 if( code == 0 )
275 // This is a glyph name that is mapped to a sequence of characters.
276 // It is not what we want.
277 iFile.getline( buf, BUF_SIZE );
278 continue;
281 catch( const char * ball )
283 std::ostringstream msg;
284 msg << "Error in glyph list. When dealing with \"" << name << "\", the following error occurred: "
285 << ball ;
286 WARN_OR_THROW( Shapes::Exceptions::ExternalError( strrefdup( msg ), true ) );
287 iFile.getline( buf, BUF_SIZE );
288 continue;
290 catch( const Shapes::Exceptions::Exception & ball )
292 throw;
294 catch( ... )
296 std::ostringstream msg;
297 msg << "Failed to catch ball. name: " << name ;
298 throw Shapes::Exceptions::InternalError( msg );
301 if( code != 0 && *end != '\0' && *end != '\r' )
303 std::ostringstream oss;
304 oss << "An error in the glyphlist file was found near the character \"" << name << "\"." ;
305 throw oss.str( );
308 nameMem_.push_back( name );
309 if( code < TABLE_SIZE )
311 if( namePtrs_[ code ] != 0 )
313 if( strncmp( namePtrs_[ code ], "afii", 4 ) == 0 ||
314 strlen( namePtrs_[ code ] ) < strlen( name ) )
316 // Names starting with "afii", or that are shorter than their alternatives are overridden.
317 namePtrs_[ code ] = name;
319 // else
320 // {
321 // std::ostringstream msg;
322 // std::msg << "Conflict in glyph list. Discarding name with code " << code
323 // << ", kept name: " << namePtrs_[ code ]
324 // << ", new name (discarded): " << name ;
325 // WARN_OR_THROW( Shapes::Exceptions::ExternalError( strrefdup( msg ), true ) );
326 // }
328 else
330 namePtrs_[ code ] = name;
333 else
335 typedef typeof namePtrsWide_ MapType;
336 MapType::iterator i = namePtrsWide_.find( code );
337 if( i != namePtrsWide_.end( ) )
339 if( strncmp( i->second, "afii", 4 ) == 0 ||
340 strlen( i->second ) < strlen( name ) )
342 // Names starting with "afii", or that are shorter than their alternatives are overridden.
343 namePtrsWide_.insert( i, MapType::value_type( code, name ) );
345 // else
346 // {
347 // std::ostringstream msg;
348 // std::msg << "Conflict in glyph list. Discarding name with code " << code
349 // << ", kept name: " << i->second
350 // << ", new name (discarded): " << name ;
351 // WARN_OR_THROW( Shapes::Exceptions::ExternalError( strrefdup( msg ), true ) );
352 // }
354 else
356 namePtrsWide_[ code ] = name;
359 nameMap_[ name ] = code;
360 iFile.getline( buf, BUF_SIZE );
364 size_t
365 GlyphList::size( ) const
367 return nameMem_.size( );