1 /* This file is part of Shapes.
3 * Shapes is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
8 * Shapes is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with Shapes. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright 2008, 2010 Henrik Tidefelt
21 #include "pdfstructure.h"
22 #include "pdfscanner.h"
23 #include "exitcodes.h"
27 #include <cstdio> // This is a workaround for a bug in Flex.
29 unsigned char hexToChar( char c1, char c2 );
30 unsigned char octalToChar( char c1, char c2, char c3 );
32 size_t stringParenDepth;
34 #define YY_EXIT_FAILURE Shapes::Interaction::EXIT_INTERNAL_ERROR
36 using namespace SimplePDF;
41 Delimiter [\[\]\(\)\{\}\<\>\/\%]
42 Regular [^ \t\n\r\[\]\(\)\{\}\<\>\/\%]
45 DecInteger [+-]?[0-9]+
47 Float [+-]?[0-9]*[.][0-9]*
51 HexString [<]({WhiteSpace}|[0-9A-Fa-f])*[>]
52 ButParentheses ([^()]|(\\(.|\n)))*
54 IndirectRef {PlainInteger}{WhiteSpace}+{PlainInteger}{WhiteSpace}*"R"
55 IndirectDef {PlainInteger}{WhiteSpace}+{PlainInteger}{WhiteSpace}*"obj"
61 %option yyclass="PdfScanner"
69 yylval.pdfObj = new PDF_Int( strtol( yytext, & end, 10 ) );
77 i = strtol( yytext, & end, 10 );
78 v = strtol( end, & end, 10 );
79 yylval.pdfObj = new PDF_Indirect( i, v );
87 i = strtol( yytext, & end, 10 );
88 v = strtol( end, & end, 10 );
89 yylval.pdfR = new PDF_Indirect_in( i, v );
95 yylval.pdfObj = new PDF_Float( strtod( yytext, & end ) );
100 /* Note that we don't parse the internal meaning of the bracketed contents, since this
101 * would only require more work when writing back to a pdf file.
104 const char * src( yytext + 1 );
109 for( ; isblank( *src ); ++src )
115 for( ; isblank( *src ); ++src )
121 str += hexToChar( c1, 0 );
124 str += hexToChar( c1, c2 );
125 for( ; isblank( *src ); ++src )
131 yylval.pdfObj = new PDF_String( str );
133 yytext[ yyleng - 1 ] = '\0';
134 yylval.pdfObj = new PDF_HexString( yytext + 1 );
139 stringParenDepth = 1;
140 BEGIN( StringState );
145 yylval.str = strdup( yytext );
150 if( stringParenDepth > 0 )
152 yylval.str = strdup( yytext );
162 <StringState>{ButParentheses} {
163 /* Note that we don't parse the internal meaning of escape sequences within the name, since this
164 * would only require more work when writing back to a pdf file.
167 char * res( new char[ pdfleng + 1 ] );
168 char * dst( res.getPtr( ) );
169 const char * src( yytext );
214 *(dst++) = octalToChar( c1, c2, c3 );
218 *(dst++) = octalToChar( 0, c1, c2 );
224 *(dst++) = octalToChar( 0, 0, c1 );
245 yylval.str = strdup( yytext );
249 true { yylval.pdfObj = new PDF_Boolean( true ); return T_Constant; }
250 false { yylval.pdfObj = new PDF_Boolean( false ); return T_Constant; }
251 null { yylval.pdfObj = new PDF_Null( ); return T_Constant; }
252 endobj { return T_endobj; }
253 stream("\r\n"|"\n") { return T_stream; }
254 endstream { return T_endstream; }
256 "<<" { return T_OpenDic; }
257 ">>" { return T_CloseDic; }
264 /* Note that we don't parse the internal meaning of #-sequences within the name, since this
265 * would only require more work when writing back to a pdf file.
268 const char * src( yytext + 1 );
269 RefCountPtr< char > dstMem( new char[ pdflength + 1 ] );
270 char * dst( dstMem.getPtr( ) );
272 for( c = *(src++); ! isblank( c ); c = *(src++) )
280 (dst++) = hexToChar( c1, c2 );
288 yylval.pdfObj = new PDF_Name( dstMem.getPtr( ) );
290 yylval.str = strdup( yytext + 1 );
294 <INITIAL>[ \t\n\r]+ ;
296 . { throw( std::string( "PDF scanner found unrecognized token: " ) + yytext ); }
299 /* The closing %% above marks the end of the Rules section and the beginning
300 * of the User Subroutines section. All text from here to the end of the
301 * file is copied verbatim to the end of the generated lex.pdf.c file.
302 * This section is where you put definitions of helper functions.
306 unsigned char hexToChar( char c1, char c2 )
308 unsigned char res( 0 );
311 res += 16 * static_cast< unsigned char >( c1 - '0' );
315 res += 16 * static_cast< unsigned char >( c1 - 'A' + 10 );
319 res += 16 * static_cast< unsigned char >( c1 - 'a' + 10 );
324 res += static_cast< unsigned char >( c2 - '0' );
328 res += static_cast< unsigned char >( c2 - 'A' + 10 );
332 res += static_cast< unsigned char >( c2 - 'a' + 10 );
338 unsigned char octalToChar( char c1, char c2, char c3 )
340 return 64 * static_cast< unsigned char >( c1 - '0' ) + 8 * static_cast< unsigned char >( c2 - '0' ) + static_cast< unsigned char >( c3 - '0' );