Updating the changelog in the VERSION file, and version_sync.
[shapes.git] / source / simplepdfi.cc
blob5e1a9bcb5381c1dbb9da55944419b3ee1dede7fd
1 /* This file is part of Shapes.
3 * Shapes is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
6 * any later version.
8 * Shapes is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with Shapes. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright 2008 Henrik Tidefelt
19 #include <iomanip>
20 #include <string>
21 #include <ctype.h>
22 #include <vector>
24 #include "simplepdfi.h"
25 //#include "globals.h"
26 #include "pdfscanner.h"
28 using namespace std;
29 using namespace SimplePDF;
31 namespace SimplePDF
33 template< >
34 RefCountPtr< PDF_Object >
35 SimplePDF::PDF_in::follow< PDF_Object >( RefCountPtr< PDF_Object > maybeIndirect )
37 PDF_Indirect * tmp( dynamic_cast< PDF_Indirect * >( maybeIndirect.getPtr( ) ) );
38 if( tmp == 0 )
40 return maybeIndirect;
42 return follow< PDF_Object >( readObjectNumbered( tmp->i, tmp->v ) );
45 template< >
46 RefCountPtr< PDF_Float >
47 SimplePDF::PDF_in::follow< PDF_Float >( RefCountPtr< PDF_Object > maybeIndirect )
49 PDF_Indirect * tmp( dynamic_cast< PDF_Indirect * >( maybeIndirect.getPtr( ) ) );
50 if( tmp == 0 )
53 RefCountPtr< PDF_Float > res( maybeIndirect.down_cast< PDF_Float >( ) );
54 if( res != NullPtr< PDF_Float >( ) )
56 return res;
60 RefCountPtr< PDF_Int > res( maybeIndirect.down_cast< PDF_Int >( ) );
61 if( res != NullPtr< PDF_Int >( ) )
63 return RefCountPtr< PDF_Float >( new PDF_Float( res->value( ) ) );
66 throw( "Downcast in PDF_in::follow failed" );
68 return follow< PDF_Float >( readObjectNumbered( tmp->i, tmp->v ) );
74 SimplePDF::PDF_in::PageIterator::PageIterator( SimplePDF::PDF_in & _in, int _pageNo )
75 : pageNo( _pageNo ), in( _in )
76 { }
78 SimplePDF::PDF_in::PageIterator::PageIterator( const PageIterator & orig )
79 : pageNo( orig.pageNo ), in( orig.in )
80 { }
82 SimplePDF::PDF_in::PageIterator &
83 SimplePDF::PDF_in::PageIterator::operator = ( const PageIterator & orig )
85 pageNo = orig.pageNo;
86 in = orig.in;
87 return *this;
90 bool
91 SimplePDF::PDF_in::PageIterator::operator == ( const PageIterator & i2 ) const
93 return pageNo == i2.pageNo;
96 bool
97 SimplePDF::PDF_in::PageIterator::operator != ( const PageIterator & i2 ) const
99 return ! operator == ( i2 );
102 RefCountPtr< PDF_Dictionary >
103 SimplePDF::PDF_in::PageIterator::operator * ( )
105 return in.getPage( pageNo );
108 SimplePDF::PDF_in::PageIterator
109 SimplePDF::PDF_in::PageIterator::operator ++ ( )
111 operator += ( 1 );
112 return *this;
115 SimplePDF::PDF_in::PageIterator
116 SimplePDF::PDF_in::PageIterator::operator -- ( )
118 operator -= ( 1 );
119 return *this;
122 SimplePDF::PDF_in::PageIterator
123 SimplePDF::PDF_in::PageIterator::operator ++ ( int )
125 SimplePDF::PDF_in::PageIterator old( *this );
126 operator += ( 1 );
127 return old;
130 SimplePDF::PDF_in::PageIterator
131 SimplePDF::PDF_in::PageIterator::operator -- ( int )
133 SimplePDF::PDF_in::PageIterator old( *this );
134 operator -= ( 1 );
135 return old;
138 SimplePDF::PDF_in::PageIterator &
139 SimplePDF::PDF_in::PageIterator::operator += ( int diff )
141 if( diff == 0 )
143 return *this;
145 if( diff < 0 )
147 return operator -= ( -diff );
149 pageNo += diff;
150 return *this;
153 SimplePDF::PDF_in::PageIterator &
154 SimplePDF::PDF_in::PageIterator::operator -= ( int diff )
156 if( diff == 0 )
158 return *this;
160 if( diff < 0 )
162 return operator += ( -diff );
164 pageNo -= diff;
165 return *this;
169 SimplePDF::PDF_in::PDF_in( RefCountPtr< istream > _is )
170 : is( _is ), isPtr( is.getPtr( ) ),
171 resources( new PDF_Dictionary )
173 string str;
174 for( int i( -6 ); ; --i )
176 isPtr->seekg( i, ios::end );
177 char c;
178 isPtr->get( c );
179 if( c == 'f' )
181 isPtr->seekg( i - 8, ios::end );
182 (*isPtr) >> str ;
183 if( str != "startxref" )
185 throw "Expected \"startxref\", found " + str;
187 break;
192 streamoff xrefTmp;
193 (*isPtr) >> xrefTmp ;
194 isPtr->seekg( xrefTmp, ios::beg );
195 string tmp;
196 (*isPtr) >> tmp ;
197 if( tmp != "xref" )
199 std::ostringstream msg;
200 msg << "Expected \"xref\" at " << xrefTmp ;
201 throw msg.str( );
205 int i;
206 (*isPtr) >> i ;
207 if( i != 0 )
209 throw "Expected a 0 before size of xref.";
212 (*isPtr) >> xrefSize ;
213 xref = isPtr->tellg( );
215 isPtr->seekg( xref + 20 * xrefSize );
216 (*isPtr) >> str ;
217 if( str != "trailer" )
219 throw "Expected \"trailer\" at this point.";
222 RefCountPtr< PDF_Object > trailerMem( parse( ) );
223 PDF_Dictionary * trailer( dynamic_cast< PDF_Dictionary * >( trailerMem.getPtr( ) ) );
224 if( trailer == 0 )
226 throw "Failed to parse the trailer dictionary.";
228 RefCountPtr< PDF_Object > rootMem( trailer->dic[ "Root" ] );
229 PDF_Indirect * indirectRoot( dynamic_cast< PDF_Indirect * >( rootMem.getPtr( ) ) );
230 if( indirectRoot == 0)
232 throw "I believe the Root field of the trailer dictionary is an indirect object...";
234 pages = follow<PDF_Vector>( follow<PDF_Dictionary>( follow<PDF_Dictionary>( trailer->operator[]( "Root" ) )->operator[]( "Pages" ) )->operator[]( "Kids" ) );
237 SimplePDF::PDF_in::~PDF_in( )
241 streamoff
242 SimplePDF::PDF_in::xreflookup( size_t i, size_t v )
244 if( i >= xrefSize )
246 throw( "xref index out of bounds" );
248 isPtr->seekg( xref + 20 * i, ios::beg );
249 streamoff res;
250 (*isPtr) >> res;
251 return res;
254 RefCountPtr< PDF_Object > PDF_in::readObjectAt( streamoff pos )
256 isPtr->seekg( pos, ios::beg );
257 return parse( );
260 RefCountPtr< PDF_Object > PDF_in::readObjectNumbered( size_t i, size_t v )
264 return readObjectAt( xreflookup( i, v ) );
266 catch( const std::string & ball )
268 std::ostringstream oss;
269 oss << "While parsing object " << i << " " << v << " at byte offset " << xreflookup( i, v )
270 << ", the following error occurred: " << ball ;
271 throw oss.str( );
275 RefCountPtr< PDF_Object >
276 PDF_in::parse( )
278 PdfScanner pdfscanner( isPtr );
279 vector< PdfScanner::UnionType > objectStack;
280 vector< int > tokenStack;
281 ostringstream stringMem;
282 PdfScanner::UnionType dummyVal;
283 while( true )
285 int token( pdfscanner.yylex( ) );
286 switch( token )
288 case T_OpenDic:
289 case '[':
290 objectStack.push_back( dummyVal );
291 tokenStack.push_back( token );
292 break;
293 case T_obj:
294 case T_Constant:
295 case T_Name:
296 objectStack.push_back( pdfscanner.yylval );
297 tokenStack.push_back( token );
298 break;
299 case T_R:
300 objectStack.push_back( pdfscanner.yylval );
301 objectStack.back( ).pdfR->PDFin = this;
302 tokenStack.push_back( T_Constant );
303 case '(':
304 stringMem.str( "" );
305 break;
306 case T_String:
308 stringMem << *pdfscanner.yylval.str ;
309 delete( pdfscanner.yylval.str );
311 break;
312 case ')':
313 objectStack.push_back( PdfScanner::UnionType( ) );
314 objectStack.back( ).pdfObj = new PDF_LiteralString( stringMem.str( ).c_str( ) );
315 tokenStack.push_back( T_Constant );
316 break;
317 case T_endobj:
319 RefCountPtr< PDF_Object > res;
320 switch( tokenStack.back( ) )
322 case T_Constant:
323 res = RefCountPtr< PDF_Object >( objectStack.back( ).pdfObj );
324 break;
325 case T_Name:
326 res = RefCountPtr< PDF_Object >( new PDF_Name( objectStack.back( ).str ) );
327 break;
328 default:
329 throw( string( "Expected a complete object before endobj." ) );
331 objectStack.pop_back( );
332 tokenStack.pop_back( );
334 if( tokenStack.back( ) != T_obj )
336 throw( string( "There wasn't exactly 1 object contained within obj...endobj." ) );
338 tokenStack.pop_back( );
339 PDF_Indirect * objRef( reinterpret_cast< PDF_Indirect * >( objectStack.back( ).pdfObj ) );
340 objectStack.pop_back( );
342 delete objRef;
343 return res;
345 break;
346 case T_stream:
348 PDF_Dictionary * streamDic( dynamic_cast< PDF_Dictionary * >( objectStack.back( ).pdfObj ) );
349 objectStack.pop_back( );
350 tokenStack.pop_back( );
351 if( streamDic == 0 )
353 throw( string( "Missing stream dictionary" ) );
355 objectStack.push_back( PdfScanner::UnionType( ) );
356 objectStack.back( ).pdfObj = new PDF_Stream_in( streamDic, isPtr, isPtr->tellg( ) );
357 tokenStack.push_back( T_Constant );
359 /* streamDic->getLength( ) may destroy the get position! */
360 streamoff tmp = isPtr->tellg( );
361 size_t length = streamDic->getLength( );
362 isPtr->seekg( tmp + length, ios::beg );
364 delete streamDic;
365 pdfscanner.yyrestart( isPtr );
366 token = pdfscanner.yylex( );
367 if( token != T_endstream )
369 throw( string( "Stream dictionary didn't tell the right length of the stream." ) );
372 break;
373 case T_endstream:
374 throw( string( "Isolated endstream encountered" ) );
375 break;
376 case ']':
378 list< PDF_Object * > tmpList;
379 int popToken( tokenStack.back( ) );
380 while( popToken != '[' )
382 switch( popToken )
384 case T_Constant:
385 tmpList.push_front( objectStack.back( ).pdfObj );
386 break;
387 case T_Name:
388 tmpList.push_front( new PDF_Name( objectStack.back( ).str ) );
389 break;
390 default:
391 throw( string( "Expected only constant values when closing vector" ) );
393 tokenStack.pop_back( );
394 objectStack.pop_back( );
396 popToken = tokenStack.back( );
399 tokenStack.pop_back( );
400 objectStack.pop_back( );
402 PDF_Vector * newVec( new PDF_Vector( ) );
403 newVec->vec.reserve( tmpList.size( ) );
404 for( list< PDF_Object * >::iterator i( tmpList.begin( ) ); i != tmpList.end( ); ++i )
406 newVec->vec.push_back( RefCountPtr< PDF_Object >( *i ) );
409 tokenStack.push_back( T_Constant );
410 objectStack.push_back( PdfScanner::UnionType( ) );
411 objectStack.back( ).pdfObj = newVec;
413 break;
414 case T_CloseDic:
416 PDF_Dictionary * newDic( new PDF_Dictionary( ) );
417 int popToken( tokenStack.back( ) );
418 while( popToken != T_OpenDic )
420 PDF_Object * theObj;
421 switch( popToken )
423 case T_Constant:
424 theObj = objectStack.back( ).pdfObj;
425 break;
426 case T_Name:
427 theObj = new PDF_Name( objectStack.back( ).str );
428 break;
429 default:
430 throw( string( "Expected constant value at this position when closing dictionary" ) );
432 tokenStack.pop_back( );
433 objectStack.pop_back( );
435 popToken = tokenStack.back( );
436 if( popToken != T_Name )
438 throw( string( "Expected name at this position when closing dictionary" ) );
440 char * theName = objectStack.back( ).str;
441 newDic->dic[ theName ] = RefCountPtr< PDF_Object >( theObj );
442 delete theName;
443 tokenStack.pop_back( );
444 objectStack.pop_back( );
446 popToken = tokenStack.back( );
448 tokenStack.pop_back( );
449 objectStack.pop_back( );
451 if( objectStack.empty( ) )
453 return RefCountPtr< PDF_Object >( newDic );
455 tokenStack.push_back( T_Constant );
456 objectStack.push_back( PdfScanner::UnionType( ) );
457 objectStack.back( ).pdfObj = newDic;
459 break;
460 default:
461 throw( string( "Unrecognized token type." ) );
464 throw( "Internal error in PDF_in::parse: Infinite loop should not be broken, only returned from." );
467 SimplePDF::PDF_in::PageIterator
468 SimplePDF::PDF_in::beginPages( )
470 return SimplePDF::PDF_in::PageIterator( *this, 0 );
473 SimplePDF::PDF_in::PageIterator
474 SimplePDF::PDF_in::endPages( )
476 return SimplePDF::PDF_in::PageIterator( *this, getPageCount( ) );
479 size_t
480 SimplePDF::PDF_in::getPageCount( )
482 size_t count = 0;
483 typedef typeof pages->vec ListType;
484 for( ListType::const_iterator i = pages->vec.begin( ); i != pages->vec.end( ); ++i )
486 RefCountPtr< PDF_Dictionary > kid( follow< PDF_Dictionary >( *i ) );
487 if( kid->isPages( ) )
489 count += kid->getCount( );
491 else
493 ++count;
497 return count;
500 RefCountPtr< PDF_Dictionary >
501 SimplePDF::PDF_in::getPage( size_t pageNo )
503 RefCountPtr< PDF_Vector > kids = pages;
504 while( true )
506 bool doAgain = false;
507 typedef typeof kids->vec ListType;
508 for( ListType::const_iterator i = kids->vec.begin( ); i != kids->vec.end( ); ++i )
510 RefCountPtr< PDF_Dictionary > kid( follow< PDF_Dictionary >( *i ) );
511 if( kid->isPages( ) )
513 size_t count = kid->getCount( );
514 if( pageNo < count )
516 kids = follow< PDF_Vector >( (*kid)[ "Kids" ] );
517 doAgain = true;
518 break;
520 else
522 pageNo -= count;
525 else
527 if( pageNo == 0 )
529 return kid;
531 else
533 --pageNo;
537 if( ! doAgain )
539 throw( "Page number out of range" );
542 throw( "Internal error in PDF_in::getPage: Infinite loop should not be broken, only returned from." );