1 /* This file is part of Shapes.
3 * Shapes is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
8 * Shapes is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with Shapes. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright 2008, 2010 Henrik Tidefelt
19 #include "glyphlist.h"
20 #include "charconverters.h"
21 #include "autoonoff.h"
22 #include "shapesexceptions.h"
23 #include "charconverters.h"
24 #include "utf8tools.h"
30 #include <errno.h> // How come iconv is not enough?
34 using namespace FontMetrics
;
36 // This function is rather special, since it assumes that numbers are written in groups
37 // of four characters, separated by exactly one space.
39 // If this fails since we're not given a singleton character, this is indicated by setting
40 // *end = src and returning 0.
41 FontMetrics::GlyphList::UnicodeType
42 glyphlist_strtol( char * src
, char ** end
, iconv_t converterUTF16BEToUCS4
)
44 const size_t BUF_SIZE
= 16;
45 static char bufUTF16BE
[ BUF_SIZE
];
49 size_t inbytesleft
= 0;
53 unsigned char * dst
= reinterpret_cast< unsigned char * >( bufUTF16BE
);
56 // Read most significant byte
61 *dst
= strtol( buf
, end
, 16 );
64 // Read least significant byte
69 *dst
= strtol( buf
, end
, 16 );
76 // end already points at the end.
83 const char * inbuf
= bufUTF16BE
;
85 FontMetrics::GlyphList::UnicodeType res
;
86 char * outbuf
= reinterpret_cast< char * >( & res
);;
87 size_t outbytesleft
= sizeof( FontMetrics::GlyphList::UnicodeType
);
89 // The ICONV_CAST macro is defined in config.h.
90 size_t count
= iconv( converterUTF16BEToUCS4
,
91 ICONV_CAST( & inbuf
), & inbytesleft
,
92 & outbuf
, & outbytesleft
);
93 if( count
== (size_t)(-1) )
97 throw "Conversion from UTF-16BE to UCS-4 failed do to illegal value.";
99 else if( errno
== EINVAL
)
101 throw "Conversion from UTF-16BE to UCS-4 failed do to incomplete value.";
103 else if( errno
== E2BIG
)
110 throw "Conversion from UTF-16BE to UCS-4 failed with an unrecognized error code.";
113 if( outbytesleft
!= 0 )
115 throw "Conversion from UTF-16BE to UCS-4 produced to output.";
120 GlyphList::GlyphList( std::istream
& iFile
)
122 namePtrs_
.resize( TABLE_SIZE
, 0 );
126 GlyphList::~GlyphList( )
130 GlyphList::UCS4_to_name( UnicodeType code
, const char ** dst
) const
132 if( code
< TABLE_SIZE
)
134 *dst
= namePtrs_
[ code
];
138 typedef typeof namePtrsWide_ MapType
;
139 MapType::const_iterator i
= namePtrsWide_
.find( code
);
140 if( i
== namePtrsWide_
.end( ) )
150 GlyphList::UTF8_to_name( const char * code
, const char ** dst
) const
152 // I've read that a UTF-8 value may occupy 6 bytes. Hence assume we get no more than 6 bytes of input.
153 // Then forget that this should be just one character. Then this could be 6 single byte code points.
154 // Since it seems like a UTF-16BE value my occupy 8 bytes (at least there are this big numbers in glyphlist.txt),
155 // 6 * 8 = 48 bytes should be enough.
156 const size_t BUF_SIZE
= 64;
157 static char buf
[ BUF_SIZE
];
161 throw Shapes::Exceptions::MiscellaneousRequirement( "When converting a single UTF-8 value to UTF-16BE: The source value is empty." );
164 size_t inbytesMax
= strlen( code
);
165 if( 8 * inbytesMax
> BUF_SIZE
) // 8 for the size of a UTF-16BE code point.
167 throw Shapes::Exceptions::MiscellaneousRequirement( "When converting a single UTF-8 value to UTF-16BE: This many bytes definitely represents more than one UTF-8 character." );
171 // Make sure there is just one character in the string.
173 for( const char * src
= code
; *src
!= '\0'; ++src
)
175 if( Shapes::Helpers::utf8leadByte( *src
) )
182 throw Shapes::Exceptions::MiscellaneousRequirement( "When converting a single UTF-8 value to UTF-16BE: There was not exactly one character." );
187 iconv_t converter
= Shapes::Helpers::requireUTF8ToUCS4Converter( );
189 const char * inbuf
= code
;
190 size_t inbytesleft
= inbytesMax
;
192 size_t outbytesleft
= BUF_SIZE
;
193 // The ICONV_CAST macro is defined in config.h.
194 size_t count
= iconv( converter
,
195 ICONV_CAST( & inbuf
), & inbytesleft
,
196 & outbuf
, & outbytesleft
);
197 if( count
== (size_t)(-1) )
199 if( errno
== EINVAL
)
201 throw Shapes::Exceptions::ExternalError( "The single UTF-8 character to be converted to UTF-16BE was incomplete." );
203 else if( errno
== EILSEQ
)
205 throw Shapes::Exceptions::ExternalError( "An invalid UTF-8 byte was encountered." );
207 else if( errno
== E2BIG
)
209 throw Shapes::Exceptions::InternalError( "The buffer allocated for UTF-8 to UTF-16BE conversion was too small." );
213 std::ostringstream msg
;
214 msg
<< "iconv failed with an unrecognized error code: " << errno
;
215 throw Shapes::Exceptions::InternalError( strrefdup( msg
) );
218 size_t bytesUsed
= outbuf
- buf
;
221 throw Shapes::Exceptions::ExternalError( "Conversion of one UTF-8 character to UTF-16BE resulted in more than 8 bytes." );
224 // Next we proceed in two steps. I can't see what the probelm here is, but it could be some alignment stuff...
225 // 1) Place in most significant bytes of a UnicodeType, with crap to the left.
226 UnicodeType codeUCS4
= *reinterpret_cast< const UnicodeType
* >( buf
) >> ( 8 * ( sizeof( UnicodeType
) - bytesUsed
) );
227 return UCS4_to_name( codeUCS4
, dst
);
231 GlyphList::name_to_UCS4( const char * name
, UnicodeType
* dst
) const
233 typedef typeof nameMap_ MapType
;
234 MapType::const_iterator i
= nameMap_
.find( name
);
235 if( i
== nameMap_
.end( ) )
244 GlyphList::readfile( std::istream
& iFile
)
246 size_t BUF_SIZE
= 255;
247 char buf
[ BUF_SIZE
];
249 iconv_t converterUTF16BEToUCS4
= Shapes::Helpers::requireUTF16BEToUCS4Converter( );
255 for( iFile
.get( c
); c
!= '\n'; iFile
.get( c
) )
261 iFile
.getline( buf
+ 1, BUF_SIZE
- 1 );
262 while( buf
[ 0 ] != '#' )
264 char * delim
= strchr( buf
, ';' );
266 char * name
= strdup( buf
);
267 char * end
; // Not const, since strtol want's it that way.
272 code
= glyphlist_strtol( delim
+ 1, & end
, converterUTF16BEToUCS4
);
275 // This is a glyph name that is mapped to a sequence of characters.
276 // It is not what we want.
277 iFile
.getline( buf
, BUF_SIZE
);
281 catch( const char * ball
)
283 std::ostringstream msg
;
284 msg
<< "Error in glyph list. When dealing with \"" << name
<< "\", the following error occurred: "
286 WARN_OR_THROW( Shapes::Exceptions::ExternalError( strrefdup( msg
), true ) );
287 iFile
.getline( buf
, BUF_SIZE
);
290 catch( const Shapes::Exceptions::Exception
& ball
)
296 std::ostringstream msg
;
297 msg
<< "Failed to catch ball. name: " << name
;
298 throw Shapes::Exceptions::InternalError( msg
);
301 if( code
!= 0 && *end
!= '\0' && *end
!= '\r' )
303 std::ostringstream oss
;
304 oss
<< "An error in the glyphlist file was found near the character \"" << name
<< "\"." ;
308 nameMem_
.push_back( name
);
309 if( code
< TABLE_SIZE
)
311 if( namePtrs_
[ code
] != 0 )
313 if( strncmp( namePtrs_
[ code
], "afii", 4 ) == 0 ||
314 strlen( namePtrs_
[ code
] ) < strlen( name
) )
316 // Names starting with "afii", or that are shorter than their alternatives are overridden.
317 namePtrs_
[ code
] = name
;
321 // std::ostringstream msg;
322 // std::msg << "Conflict in glyph list. Discarding name with code " << code
323 // << ", kept name: " << namePtrs_[ code ]
324 // << ", new name (discarded): " << name ;
325 // WARN_OR_THROW( Shapes::Exceptions::ExternalError( strrefdup( msg ), true ) );
330 namePtrs_
[ code
] = name
;
335 typedef typeof namePtrsWide_ MapType
;
336 MapType::iterator i
= namePtrsWide_
.find( code
);
337 if( i
!= namePtrsWide_
.end( ) )
339 if( strncmp( i
->second
, "afii", 4 ) == 0 ||
340 strlen( i
->second
) < strlen( name
) )
342 // Names starting with "afii", or that are shorter than their alternatives are overridden.
343 namePtrsWide_
.insert( i
, MapType::value_type( code
, name
) );
347 // std::ostringstream msg;
348 // std::msg << "Conflict in glyph list. Discarding name with code " << code
349 // << ", kept name: " << i->second
350 // << ", new name (discarded): " << name ;
351 // WARN_OR_THROW( Shapes::Exceptions::ExternalError( strrefdup( msg ), true ) );
356 namePtrsWide_
[ code
] = name
;
359 nameMap_
[ name
] = code
;
360 iFile
.getline( buf
, BUF_SIZE
);
365 GlyphList::size( ) const
367 return nameMem_
.size( );