Updating the changelog in the VERSION file, and version_sync.
[shapes.git] / source / pdfyylex.ll
blobb9cb3ba105db59a61df4b06491618f0c7546c774
1 /* This file is part of Shapes.
2  *
3  * Shapes is free software: you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License as published by
5  * the Free Software Foundation, either version 3 of the License, or
6  * any later version.
7  *
8  * Shapes is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with Shapes.  If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright 2008 Henrik Tidefelt
17  */
21 #include "pdfstructure.h"
22 #include "pdfscanner.h"
23 #include "exitcodes.h"
24 #include <string>
25 #include <iostream>
26 #include <iomanip>
28 unsigned char hexToChar( char c1, char c2 );
29 unsigned char octalToChar( char c1, char c2, char c3 );
31 size_t stringParenDepth;
33 #define YY_EXIT_FAILURE Shapes::Interaction::EXIT_INTERNAL_ERROR
35 using namespace SimplePDF;
39 WhiteSpace [ \t\n\r]
40 Delimiter [\[\]\(\)\{\}\<\>\/\%]
41 Regular [^ \t\n\r\[\]\(\)\{\}\<\>\/\%]
43 PlainInteger [0-9]+
44 DecInteger [+-]?[0-9]+
46 Float [+-]?[0-9]*[.][0-9]*
48 Name [/]{Regular}*
50 HexString [<]({WhiteSpace}|[0-9A-Fa-f])*[>]
51 ButParentheses ([^()]|(\\(.|\n)))*
53 IndirectRef {PlainInteger}{WhiteSpace}+{PlainInteger}{WhiteSpace}*"R"
54 IndirectDef {PlainInteger}{WhiteSpace}+{PlainInteger}{WhiteSpace}*"obj"
56 %option c++
57 %option noyywrap
59 %option prefix="pdf"
60 %option yyclass="PdfScanner"
62 %x StringState
66 {DecInteger} {
67         char * end;
68         yylval.pdfObj = new PDF_Int( strtol( yytext, & end, 10 ) );
69         return T_Constant;
72 {IndirectDef} {
73         char * end;
74         long i;
75         long v;
76         i = strtol( yytext, & end, 10 );
77         v = strtol( end, & end, 10 );
78         yylval.pdfObj = new PDF_Indirect( i, v );
79         return T_obj;
82 {IndirectRef} {
83         char * end;
84         long i;
85         long v;
86         i = strtol( yytext, & end, 10 );
87         v = strtol( end, & end, 10 );
88         yylval.pdfR = new PDF_Indirect_in( i, v );
89         return T_R;
92 {Float} {
93         char * end;
94         yylval.pdfObj = new PDF_Float( strtod( yytext, & end ) );
95         return T_Constant;
98 {HexString} {
99         /* Note that we don't parse the internal meaning of the bracketed contents, since this
100          * would only require more work when writing back to a pdf file.
101          */
102         /*
103         const char * src( yytext + 1 );
104         char c1;
105         char c2;
106         std::string str;
108         for( ; isblank( *src ); ++src )
109                 ;
110         c1 = *src;
111         ++src;
112         while( c1 != '>' )
113                 {
114                         for( ; isblank( *src ); ++src )
115                                 ;
116                         c2 = *src;
117                         ++src;
118                         if( c2 == '>' )
119                                 {
120                                         str += hexToChar( c1, 0 );
121                                         break;
122                                 }
123                         str += hexToChar( c1, c2 );
124                         for( ; isblank( *src ); ++src )
125                                 ;
126                         c1 = *src;
127                         ++src;
128                 }
130         yylval.pdfObj = new PDF_String( str );
131         */
132         yytext[ yyleng - 1 ] = '\0';
133         yylval.pdfObj = new PDF_HexString( yytext + 1 );
134         return T_Constant;
137 <INITIAL>[\(] {
138         stringParenDepth = 1;
139         BEGIN( StringState );
140         return yytext[0];
142 <StringState>[\(] {
143         ++stringParenDepth;
144         yylval.str = strdup( yytext );
145         return T_String;
147 <StringState>[\)] {
148         --stringParenDepth;
149         if( stringParenDepth > 0 )
150                 {
151                         yylval.str = strdup( yytext );
152                         return T_String;
153                 }
154         else
155                 {
156                         BEGIN( INITIAL );
157                         return yytext[ 0 ];
158                 }
161 <StringState>{ButParentheses} {
162         /* Note that we don't parse the internal meaning of escape sequences within the name, since this
163          * would only require more work when writing back to a pdf file.
164          */
165         /*
166         char * res( new char[ pdfleng + 1 ] );
167         char * dst( res.getPtr( ) );
168         const char * src( yytext );
169         char c;
170         while( true )
171                 {
172                         c = *(src++);
173                         if( c == '\\' )
174                                 {
175                                         char c1( *(src++) );
176                                         switch( c1 )
177                                                 {
178                                                 case 'n':
179                                                         *(dst++) = '\n';
180                                                         break;
181                                                 case 'r':
182                                                         *(dst++) = '\r';
183                                                         break;
184                                                 case 't':
185                                                         *(dst++) = '\t';
186                                                         break;
187                                                 case 'b':
188                                                         *(dst++) = '\b';
189                                                         break;
190                                                 case 'f':
191                                                         *(dst++) = '\f';
192                                                         break;
193                                                 case '(':
194                                                         *(dst++) = '(';
195                                                         break;
196                                                 case ')':
197                                                         *(dst++) = ')';
198                                                         break;
199                                                 case '\\':
200                                                         *(dst++) = '\\';
201                                                         break;
202                                                 default:
203                                                         if( isdigit( c1 ) )
204                                                                 {
205                                                                         char c2;
206                                                                         isPtr->get( c2 );
207                                                                         if( isdigit( c2 ) )
208                                                                                 {
209                                                                                         char c3;
210                                                                                         isPtr->get( c3 );
211                                                                                         if( isdigit( c3 ) )
212                                                                                                 {
213                                                                                                         *(dst++) = octalToChar( c1, c2, c3 );
214                                                                                                 }
215                                                                                         else
216                                                                                                 {
217                                                                                                         *(dst++) = octalToChar( 0, c1, c2 );
218                                                                                                         *(dst++) = c3;
219                                                                                                 }
220                                                                                 }
221                                                                         else
222                                                                                 {
223                                                                                         *(dst++) = octalToChar( 0, 0, c1 );
224                                                                                         *(dst++) = c2;
225                                                                                 }
226                                                                 }
227                                                         else
228                                                                 {
229                                                                         *(dst++) = c1;
230                                                                 }
231                                                         break;
232                                                 }
233                                 }
234                         else
235                                 {
236                                         *(dst++) = c;
237                                 }
238                 }
240  done:
241         *dst = '\0';
242         yylval.str = res;
243         */
244         yylval.str = strdup( yytext );
245         return T_String;
248 true { yylval.pdfObj = new PDF_Boolean( true ); return T_Constant; }
249 false { yylval.pdfObj = new PDF_Boolean( false ); return T_Constant; }
250 null { yylval.pdfObj = new PDF_Null( ); return T_Constant; }
251 endobj { return T_endobj; }
252 stream("\r\n"|"\n") { return T_stream; }
253 endstream { return T_endstream; }
255 "<<" { return T_OpenDic; }
256 ">>" { return T_CloseDic; }
258 [\[\]] {
259         return yytext[0];
262 {Name} {
263         /* Note that we don't parse the internal meaning of #-sequences within the name, since this
264          * would only require more work when writing back to a pdf file.
265          */
266         /*
267         const char * src( yytext + 1 );
268         RefCountPtr< char > dstMem( new char[ pdflength + 1 ] );
269         char * dst( dstMem.getPtr( ) );
270         char c;
271         for( c = *(src++); ! isblank( c ); c = *(src++) )
272                 {
273                         if( c == '#' )
274                                 {
275                                         char c1;
276                                         char c2;
277                                         c1 = *(src++);
278                                         c2 = *(src++);
279                                         (dst++) = hexToChar( c1, c2 );
280                                 }
281                         else
282                                 {
283                                         (dst++) = c;
284                                 }
285                 }
286         *dst = '\0';
287         yylval.pdfObj = new PDF_Name( dstMem.getPtr( ) );
288         */
289         yylval.str = strdup( yytext + 1 );
290         return T_Name;
293 <INITIAL>[ \t\n\r]+ ;
295 . { throw( std::string( "PDF scanner found unrecognized token: " ) + yytext ); }
298 /* The closing %% above marks the end of the Rules section and the beginning
299  * of the User Subroutines section. All text from here to the end of the
300  * file is copied verbatim to the end of the generated lex.pdf.c file.
301  * This section is where you put definitions of helper functions.
302  */
305 unsigned char hexToChar( char c1, char c2 )
307         unsigned char res( 0 );
308         if( c1 < 'A' )
309                 {
310                         res += 16 * static_cast< unsigned char >( c1 - '0' );
311                 }
312         else if( c1 < 'a' )
313                 {
314                         res += 16 * static_cast< unsigned char >( c1 - 'A' + 10 );
315                 }
316         else
317                 {
318                         res += 16 * static_cast< unsigned char >( c1 - 'a' + 10 );
319                 }
321         if( c2 < 'A' )
322                 {
323                         res += static_cast< unsigned char >( c2 - '0' );
324                 }
325         else if( c2 < 'a' )
326                 {
327                         res += static_cast< unsigned char >( c2 - 'A' + 10 );
328                 }
329         else
330                 {
331                         res += static_cast< unsigned char >( c2 - 'a' + 10 );
332                 }
334                 return res;
337 unsigned char octalToChar( char c1, char c2, char c3 )
339         return 64 * static_cast< unsigned char >( c1 - '0' ) + 8 * static_cast< unsigned char >( c2 - '0' ) + static_cast< unsigned char >( c3 - '0' );