1 /* This file is part of Shapes.
3 * Shapes is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
8 * Shapes is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with Shapes. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright 2008, 2010 Henrik Tidefelt
22 * Lex inupt file to generate the scanner for the scanning of length data stored in a page's content stream.
27 /* The text within this first region delimited by %{ and %} is assumed to
28 * be C/C++ code and will be copied verbatim to the lex.pdf.c file ahead
29 * of the definitions of the pdflex() function. Add other header file inclusions
30 * or C++ variable declarations/prototypes that are needed by your code here.
33 #include "pdfstructure.h"
34 #include "pdfscanner.h"
35 #include "pdfparser.tab.h"
40 #include <cstdio> // This is a workaround for a bug in Flex.
42 unsigned char hexToChar( char c1, char c2 );
43 unsigned char octalToChar( char c1, char c2, char c3 );
45 size_t stringParenDepth;
48 * Global variable: pdflval
49 * -----------------------
50 * This global variable is how we get attribute information about the token
51 * just scanned to the client. The scanner sets the global variable
52 * appropriately and since it's global the client can just read it. In the
53 * future, this variable will be declared for us in the y.tab.c file
54 * produced by Yacc, but for now, we declare it manually.
56 //YYSTYPE pdflval; // manually declared for pp1, later Yacc provides
59 * Global variable: pdflloc
60 * -----------------------
61 * This global variable is how we get position information about the token
62 * just scanned to the client. (Operates similarly to pdflval above)
64 //struct pdfltype pdflloc; // manually dclared for pp1, later Yacc provides
69 * The section before the first %% is the Definitions section of the lex
70 * input file. Here is where you set options for the scanner, define lex
71 * states, and can set up definitions to give names to regular expressions
72 * as a simple substitution mechanism that allows for more readable
73 * entries in the Rules section later.
77 NonWhiteSpace [^ \t\n\r]
80 DecInteger [+-]?[0-9]+
82 Float [+-]?[0-9]*[.][0-9]*
84 Name [/]{NonWhiteSpace}*
86 HexString [<]({WhiteSpace}|[0-9A-Fa-f])*[>]
87 ButParentheses ([^()]|(\\(.|\n)))*
98 pdflval.plainInt = strtol( yytext, & end, 10 );
104 pdflval.pdfObj = new PDF_Float( strtod( yytext, & end ) );
109 /* Note that we don't parse the internal meaning of the bracketed contents, since this
110 * would only require more work when writing back to a pdf file.
113 const char * src( yytext + 1 );
118 for( ; isblank( *src ); ++src )
124 for( ; isblank( *src ); ++src )
130 str += hexToChar( c1, 0 );
133 str += hexToChar( c1, c2 );
134 for( ; isblank( *src ); ++src )
140 pdflval.pdfObj = new PDF_String( str );
142 pdflval.pdfObj = new PDF_HexString( yytext );
147 stringParenDepth = 1;
148 BEGIN( StringState );
153 pdflval.str = strdup( yytext );
158 if( stringParenDepth > 0 )
160 pdflval.str = strdup( yytext );
170 <StringState>{ButParentheses} {
171 /* Note that we don't parse the internal meaning of escape sequences within the name, since this
172 * would only require more work when writing back to a pdf file.
175 char * res( new char[ pdfleng + 1 ] );
176 char * dst( res.getPtr( ) );
177 const char * src( yytext );
222 *(dst++) = octalToChar( c1, c2, c3 );
226 *(dst++) = octalToChar( 0, c1, c2 );
232 *(dst++) = octalToChar( 0, 0, c1 );
253 pdflval.str = strdup( yytext );
257 true { pdflval.pdfObj = new PDF_Boolean( true ); return T_Constant; }
258 false { pdflval.pdfObj = new PDF_Boolean( false ); return T_Constant; }
259 null { pdflval.pdfObj = new PDF_Null( ); return T_Constant; }
260 obj { return T_obj; }
261 endobj { return T_endobj; }
262 stream("\r\n"|"\n") { return T_stream; }
263 endstream { return T_endstream; }
266 "<<" { return T_OpenDic; }
267 ">>" { return T_CloseDic; }
274 /* Note that we don't parse the internal meaning of #-sequences within the name, since this
275 * would only require more work when writing back to a pdf file.
278 const char * src( yytext + 1 );
279 RefCountPtr< char > dstMem( new char[ pdflength + 1 ] );
280 char * dst( dstMem.getPtr( ) );
282 for( c = *(src++); ! isblank( c ); c = *(src++) )
290 (dst++) = hexToChar( c1, c2 );
298 pdflval.pdfObj = new PDF_Name( dstMem.getPtr( ) );
300 pdflval.pdfObj = new PDF_Name( yytext + 1 );
304 <INITIAL>[ \t\n\r]+ ;
306 . { throw( "Page scanner found unrecognized token" ); }
309 /* The closing %% above marks the end of the Rules section and the beginning
310 * of the User Subroutines section. All text from here to the end of the
311 * file is copied verbatim to the end of the generated lex.pdf.c file.
312 * This section is where you put definitions of helper functions.
316 unsigned char hexToChar( char c1, char c2 )
318 unsigned char res( 0 );
321 res += 16 * static_cast< unsigned char >( c1 - '0' );
325 res += 16 * static_cast< unsigned char >( c1 - 'A' + 10 );
329 res += 16 * static_cast< unsigned char >( c1 - 'a' + 10 );
334 res += static_cast< unsigned char >( c2 - '0' );
338 res += static_cast< unsigned char >( c2 - 'A' + 10 );
342 res += static_cast< unsigned char >( c2 - 'a' + 10 );
348 unsigned char octalToChar( char c1, char c2, char c3 )
350 return 64 * static_cast< unsigned char >( c1 - '0' ) + 8 * static_cast< unsigned char >( c2 - '0' ) + static_cast< unsigned char >( c3 - '0' );