Update procedures
[shapes.git] / source / pdfyylex.ll
blob5034782e28b09762cd1084266387fd93bb5a6c2b
1 /* This file is part of Shapes.
2  *
3  * Shapes is free software: you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License as published by
5  * the Free Software Foundation, either version 3 of the License, or
6  * any later version.
7  *
8  * Shapes is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with Shapes.  If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright 2008, 2010 Henrik Tidefelt
17  */
21 #include "pdfstructure.h"
22 #include "pdfscanner.h"
23 #include "exitcodes.h"
24 #include <string>
25 #include <iostream>
26 #include <iomanip>
27 #include <cstdio> // This is a workaround for a bug in Flex.
29 unsigned char hexToChar( char c1, char c2 );
30 unsigned char octalToChar( char c1, char c2, char c3 );
32 size_t stringParenDepth;
34 #define YY_EXIT_FAILURE Shapes::Interaction::EXIT_INTERNAL_ERROR
36 using namespace SimplePDF;
40 WhiteSpace [ \t\n\r]
41 Delimiter [\[\]\(\)\{\}\<\>\/\%]
42 Regular [^ \t\n\r\[\]\(\)\{\}\<\>\/\%]
44 PlainInteger [0-9]+
45 DecInteger [+-]?[0-9]+
47 Float [+-]?[0-9]*[.][0-9]*
49 Name [/]{Regular}*
51 HexString [<]({WhiteSpace}|[0-9A-Fa-f])*[>]
52 ButParentheses ([^()]|(\\(.|\n)))*
54 IndirectRef {PlainInteger}{WhiteSpace}+{PlainInteger}{WhiteSpace}*"R"
55 IndirectDef {PlainInteger}{WhiteSpace}+{PlainInteger}{WhiteSpace}*"obj"
57 %option c++
58 %option noyywrap
60 %option prefix="pdf"
61 %option yyclass="PdfScanner"
63 %x StringState
67 {DecInteger} {
68         char * end;
69         yylval.pdfObj = new PDF_Int( strtol( yytext, & end, 10 ) );
70         return T_Constant;
73 {IndirectDef} {
74         char * end;
75         long i;
76         long v;
77         i = strtol( yytext, & end, 10 );
78         v = strtol( end, & end, 10 );
79         yylval.pdfObj = new PDF_Indirect( i, v );
80         return T_obj;
83 {IndirectRef} {
84         char * end;
85         long i;
86         long v;
87         i = strtol( yytext, & end, 10 );
88         v = strtol( end, & end, 10 );
89         yylval.pdfR = new PDF_Indirect_in( i, v );
90         return T_R;
93 {Float} {
94         char * end;
95         yylval.pdfObj = new PDF_Float( strtod( yytext, & end ) );
96         return T_Constant;
99 {HexString} {
100         /* Note that we don't parse the internal meaning of the bracketed contents, since this
101          * would only require more work when writing back to a pdf file.
102          */
103         /*
104         const char * src( yytext + 1 );
105         char c1;
106         char c2;
107         std::string str;
109         for( ; isblank( *src ); ++src )
110                 ;
111         c1 = *src;
112         ++src;
113         while( c1 != '>' )
114                 {
115                         for( ; isblank( *src ); ++src )
116                                 ;
117                         c2 = *src;
118                         ++src;
119                         if( c2 == '>' )
120                                 {
121                                         str += hexToChar( c1, 0 );
122                                         break;
123                                 }
124                         str += hexToChar( c1, c2 );
125                         for( ; isblank( *src ); ++src )
126                                 ;
127                         c1 = *src;
128                         ++src;
129                 }
131         yylval.pdfObj = new PDF_String( str );
132         */
133         yytext[ yyleng - 1 ] = '\0';
134         yylval.pdfObj = new PDF_HexString( yytext + 1 );
135         return T_Constant;
138 <INITIAL>[\(] {
139         stringParenDepth = 1;
140         BEGIN( StringState );
141         return yytext[0];
143 <StringState>[\(] {
144         ++stringParenDepth;
145         yylval.str = strdup( yytext );
146         return T_String;
148 <StringState>[\)] {
149         --stringParenDepth;
150         if( stringParenDepth > 0 )
151                 {
152                         yylval.str = strdup( yytext );
153                         return T_String;
154                 }
155         else
156                 {
157                         BEGIN( INITIAL );
158                         return yytext[ 0 ];
159                 }
162 <StringState>{ButParentheses} {
163         /* Note that we don't parse the internal meaning of escape sequences within the name, since this
164          * would only require more work when writing back to a pdf file.
165          */
166         /*
167         char * res( new char[ pdfleng + 1 ] );
168         char * dst( res.getPtr( ) );
169         const char * src( yytext );
170         char c;
171         while( true )
172                 {
173                         c = *(src++);
174                         if( c == '\\' )
175                                 {
176                                         char c1( *(src++) );
177                                         switch( c1 )
178                                                 {
179                                                 case 'n':
180                                                         *(dst++) = '\n';
181                                                         break;
182                                                 case 'r':
183                                                         *(dst++) = '\r';
184                                                         break;
185                                                 case 't':
186                                                         *(dst++) = '\t';
187                                                         break;
188                                                 case 'b':
189                                                         *(dst++) = '\b';
190                                                         break;
191                                                 case 'f':
192                                                         *(dst++) = '\f';
193                                                         break;
194                                                 case '(':
195                                                         *(dst++) = '(';
196                                                         break;
197                                                 case ')':
198                                                         *(dst++) = ')';
199                                                         break;
200                                                 case '\\':
201                                                         *(dst++) = '\\';
202                                                         break;
203                                                 default:
204                                                         if( isdigit( c1 ) )
205                                                                 {
206                                                                         char c2;
207                                                                         isPtr->get( c2 );
208                                                                         if( isdigit( c2 ) )
209                                                                                 {
210                                                                                         char c3;
211                                                                                         isPtr->get( c3 );
212                                                                                         if( isdigit( c3 ) )
213                                                                                                 {
214                                                                                                         *(dst++) = octalToChar( c1, c2, c3 );
215                                                                                                 }
216                                                                                         else
217                                                                                                 {
218                                                                                                         *(dst++) = octalToChar( 0, c1, c2 );
219                                                                                                         *(dst++) = c3;
220                                                                                                 }
221                                                                                 }
222                                                                         else
223                                                                                 {
224                                                                                         *(dst++) = octalToChar( 0, 0, c1 );
225                                                                                         *(dst++) = c2;
226                                                                                 }
227                                                                 }
228                                                         else
229                                                                 {
230                                                                         *(dst++) = c1;
231                                                                 }
232                                                         break;
233                                                 }
234                                 }
235                         else
236                                 {
237                                         *(dst++) = c;
238                                 }
239                 }
241  done:
242         *dst = '\0';
243         yylval.str = res;
244         */
245         yylval.str = strdup( yytext );
246         return T_String;
249 true { yylval.pdfObj = new PDF_Boolean( true ); return T_Constant; }
250 false { yylval.pdfObj = new PDF_Boolean( false ); return T_Constant; }
251 null { yylval.pdfObj = new PDF_Null( ); return T_Constant; }
252 endobj { return T_endobj; }
253 stream("\r\n"|"\n") { return T_stream; }
254 endstream { return T_endstream; }
256 "<<" { return T_OpenDic; }
257 ">>" { return T_CloseDic; }
259 [\[\]] {
260         return yytext[0];
263 {Name} {
264         /* Note that we don't parse the internal meaning of #-sequences within the name, since this
265          * would only require more work when writing back to a pdf file.
266          */
267         /*
268         const char * src( yytext + 1 );
269         RefCountPtr< char > dstMem( new char[ pdflength + 1 ] );
270         char * dst( dstMem.getPtr( ) );
271         char c;
272         for( c = *(src++); ! isblank( c ); c = *(src++) )
273                 {
274                         if( c == '#' )
275                                 {
276                                         char c1;
277                                         char c2;
278                                         c1 = *(src++);
279                                         c2 = *(src++);
280                                         (dst++) = hexToChar( c1, c2 );
281                                 }
282                         else
283                                 {
284                                         (dst++) = c;
285                                 }
286                 }
287         *dst = '\0';
288         yylval.pdfObj = new PDF_Name( dstMem.getPtr( ) );
289         */
290         yylval.str = strdup( yytext + 1 );
291         return T_Name;
294 <INITIAL>[ \t\n\r]+ ;
296 . { throw( std::string( "PDF scanner found unrecognized token: " ) + yytext ); }
299 /* The closing %% above marks the end of the Rules section and the beginning
300  * of the User Subroutines section. All text from here to the end of the
301  * file is copied verbatim to the end of the generated lex.pdf.c file.
302  * This section is where you put definitions of helper functions.
303  */
306 unsigned char hexToChar( char c1, char c2 )
308         unsigned char res( 0 );
309         if( c1 < 'A' )
310                 {
311                         res += 16 * static_cast< unsigned char >( c1 - '0' );
312                 }
313         else if( c1 < 'a' )
314                 {
315                         res += 16 * static_cast< unsigned char >( c1 - 'A' + 10 );
316                 }
317         else
318                 {
319                         res += 16 * static_cast< unsigned char >( c1 - 'a' + 10 );
320                 }
322         if( c2 < 'A' )
323                 {
324                         res += static_cast< unsigned char >( c2 - '0' );
325                 }
326         else if( c2 < 'a' )
327                 {
328                         res += static_cast< unsigned char >( c2 - 'A' + 10 );
329                 }
330         else
331                 {
332                         res += static_cast< unsigned char >( c2 - 'a' + 10 );
333                 }
335                 return res;
338 unsigned char octalToChar( char c1, char c2, char c3 )
340         return 64 * static_cast< unsigned char >( c1 - '0' ) + 8 * static_cast< unsigned char >( c2 - '0' ) + static_cast< unsigned char >( c3 - '0' );