1 /* This file is part of Shapes.
3 * Shapes is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
8 * Shapes is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with Shapes. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright 2008 Henrik Tidefelt
22 * Lex inupt file to generate the scanner for the scanning of length data stored in a page's content stream.
27 /* The text within this first region delimited by %{ and %} is assumed to
28 * be C/C++ code and will be copied verbatim to the lex.pdf.c file ahead
29 * of the definitions of the pdflex() function. Add other header file inclusions
30 * or C++ variable declarations/prototypes that are needed by your code here.
33 #include "pdfstructure.h"
34 #include "pdfscanner.h"
35 #include "pdfparser.tab.h"
40 unsigned char hexToChar( char c1, char c2 );
41 unsigned char octalToChar( char c1, char c2, char c3 );
43 size_t stringParenDepth;
46 * Global variable: pdflval
47 * -----------------------
48 * This global variable is how we get attribute information about the token
49 * just scanned to the client. The scanner sets the global variable
50 * appropriately and since it's global the client can just read it. In the
51 * future, this variable will be declared for us in the y.tab.c file
52 * produced by Yacc, but for now, we declare it manually.
54 //YYSTYPE pdflval; // manually declared for pp1, later Yacc provides
57 * Global variable: pdflloc
58 * -----------------------
59 * This global variable is how we get position information about the token
60 * just scanned to the client. (Operates similarly to pdflval above)
62 //struct pdfltype pdflloc; // manually dclared for pp1, later Yacc provides
67 * The section before the first %% is the Definitions section of the lex
68 * input file. Here is where you set options for the scanner, define lex
69 * states, and can set up definitions to give names to regular expressions
70 * as a simple substitution mechanism that allows for more readable
71 * entries in the Rules section later.
75 NonWhiteSpace [^ \t\n\r]
78 DecInteger [+-]?[0-9]+
80 Float [+-]?[0-9]*[.][0-9]*
82 Name [/]{NonWhiteSpace}*
84 HexString [<]({WhiteSpace}|[0-9A-Fa-f])*[>]
85 ButParentheses ([^()]|(\\(.|\n)))*
96 pdflval.plainInt = strtol( yytext, & end, 10 );
102 pdflval.pdfObj = new PDF_Float( strtod( yytext, & end ) );
107 /* Note that we don't parse the internal meaning of the bracketed contents, since this
108 * would only require more work when writing back to a pdf file.
111 const char * src( yytext + 1 );
116 for( ; isblank( *src ); ++src )
122 for( ; isblank( *src ); ++src )
128 str += hexToChar( c1, 0 );
131 str += hexToChar( c1, c2 );
132 for( ; isblank( *src ); ++src )
138 pdflval.pdfObj = new PDF_String( str );
140 pdflval.pdfObj = new PDF_HexString( yytext );
145 stringParenDepth = 1;
146 BEGIN( StringState );
151 pdflval.str = strdup( yytext );
156 if( stringParenDepth > 0 )
158 pdflval.str = strdup( yytext );
168 <StringState>{ButParentheses} {
169 /* Note that we don't parse the internal meaning of escape sequences within the name, since this
170 * would only require more work when writing back to a pdf file.
173 char * res( new char[ pdfleng + 1 ] );
174 char * dst( res.getPtr( ) );
175 const char * src( yytext );
220 *(dst++) = octalToChar( c1, c2, c3 );
224 *(dst++) = octalToChar( 0, c1, c2 );
230 *(dst++) = octalToChar( 0, 0, c1 );
251 pdflval.str = strdup( yytext );
255 true { pdflval.pdfObj = new PDF_Boolean( true ); return T_Constant; }
256 false { pdflval.pdfObj = new PDF_Boolean( false ); return T_Constant; }
257 null { pdflval.pdfObj = new PDF_Null( ); return T_Constant; }
258 obj { return T_obj; }
259 endobj { return T_endobj; }
260 stream("\r\n"|"\n") { return T_stream; }
261 endstream { return T_endstream; }
264 "<<" { return T_OpenDic; }
265 ">>" { return T_CloseDic; }
272 /* Note that we don't parse the internal meaning of #-sequences within the name, since this
273 * would only require more work when writing back to a pdf file.
276 const char * src( yytext + 1 );
277 RefCountPtr< char > dstMem( new char[ pdflength + 1 ] );
278 char * dst( dstMem.getPtr( ) );
280 for( c = *(src++); ! isblank( c ); c = *(src++) )
288 (dst++) = hexToChar( c1, c2 );
296 pdflval.pdfObj = new PDF_Name( dstMem.getPtr( ) );
298 pdflval.pdfObj = new PDF_Name( yytext + 1 );
302 <INITIAL>[ \t\n\r]+ ;
304 . { throw( "Page scanner found unrecognized token" ); }
307 /* The closing %% above marks the end of the Rules section and the beginning
308 * of the User Subroutines section. All text from here to the end of the
309 * file is copied verbatim to the end of the generated lex.pdf.c file.
310 * This section is where you put definitions of helper functions.
314 unsigned char hexToChar( char c1, char c2 )
316 unsigned char res( 0 );
319 res += 16 * static_cast< unsigned char >( c1 - '0' );
323 res += 16 * static_cast< unsigned char >( c1 - 'A' + 10 );
327 res += 16 * static_cast< unsigned char >( c1 - 'a' + 10 );
332 res += static_cast< unsigned char >( c2 - '0' );
336 res += static_cast< unsigned char >( c2 - 'A' + 10 );
340 res += static_cast< unsigned char >( c2 - 'a' + 10 );
346 unsigned char octalToChar( char c1, char c2, char c3 )
348 return 64 * static_cast< unsigned char >( c1 - '0' ) + 8 * static_cast< unsigned char >( c2 - '0' ) + static_cast< unsigned char >( c3 - '0' );