1 /* This file is part of Shapes.
3 * Shapes is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
8 * Shapes is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with Shapes. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright 2008 Henrik Tidefelt
24 #include "simplepdfi.h"
25 //#include "globals.h"
26 #include "pdfscanner.h"
29 using namespace SimplePDF
;
34 RefCountPtr
< PDF_Object
>
35 SimplePDF::PDF_in::follow
< PDF_Object
>( RefCountPtr
< PDF_Object
> maybeIndirect
)
37 PDF_Indirect
* tmp( dynamic_cast< PDF_Indirect
* >( maybeIndirect
.getPtr( ) ) );
42 return follow
< PDF_Object
>( readObjectNumbered( tmp
->i
, tmp
->v
) );
46 RefCountPtr
< PDF_Float
>
47 SimplePDF::PDF_in::follow
< PDF_Float
>( RefCountPtr
< PDF_Object
> maybeIndirect
)
49 PDF_Indirect
* tmp( dynamic_cast< PDF_Indirect
* >( maybeIndirect
.getPtr( ) ) );
53 RefCountPtr
< PDF_Float
> res( maybeIndirect
.down_cast
< PDF_Float
>( ) );
54 if( res
!= NullPtr
< PDF_Float
>( ) )
60 RefCountPtr
< PDF_Int
> res( maybeIndirect
.down_cast
< PDF_Int
>( ) );
61 if( res
!= NullPtr
< PDF_Int
>( ) )
63 return RefCountPtr
< PDF_Float
>( new PDF_Float( res
->value( ) ) );
66 throw( "Downcast in PDF_in::follow failed" );
68 return follow
< PDF_Float
>( readObjectNumbered( tmp
->i
, tmp
->v
) );
74 SimplePDF::PDF_in::PageIterator::PageIterator( SimplePDF::PDF_in
& _in
, int _pageNo
)
75 : pageNo( _pageNo
), in( _in
)
78 SimplePDF::PDF_in::PageIterator::PageIterator( const PageIterator
& orig
)
79 : pageNo( orig
.pageNo
), in( orig
.in
)
82 SimplePDF::PDF_in::PageIterator
&
83 SimplePDF::PDF_in::PageIterator::operator = ( const PageIterator
& orig
)
91 SimplePDF::PDF_in::PageIterator::operator == ( const PageIterator
& i2
) const
93 return pageNo
== i2
.pageNo
;
97 SimplePDF::PDF_in::PageIterator::operator != ( const PageIterator
& i2
) const
99 return ! operator == ( i2
);
102 RefCountPtr
< PDF_Dictionary
>
103 SimplePDF::PDF_in::PageIterator::operator * ( )
105 return in
.getPage( pageNo
);
108 SimplePDF::PDF_in::PageIterator
109 SimplePDF::PDF_in::PageIterator::operator ++ ( )
115 SimplePDF::PDF_in::PageIterator
116 SimplePDF::PDF_in::PageIterator::operator -- ( )
122 SimplePDF::PDF_in::PageIterator
123 SimplePDF::PDF_in::PageIterator::operator ++ ( int )
125 SimplePDF::PDF_in::PageIterator
old( *this );
130 SimplePDF::PDF_in::PageIterator
131 SimplePDF::PDF_in::PageIterator::operator -- ( int )
133 SimplePDF::PDF_in::PageIterator
old( *this );
138 SimplePDF::PDF_in::PageIterator
&
139 SimplePDF::PDF_in::PageIterator::operator += ( int diff
)
147 return operator -= ( -diff
);
153 SimplePDF::PDF_in::PageIterator
&
154 SimplePDF::PDF_in::PageIterator::operator -= ( int diff
)
162 return operator += ( -diff
);
169 SimplePDF::PDF_in::PDF_in( RefCountPtr
< istream
> _is
)
170 : is( _is
), isPtr( is
.getPtr( ) ),
171 resources( new PDF_Dictionary
)
174 for( int i( -6 ); ; --i
)
176 isPtr
->seekg( i
, ios::end
);
181 isPtr
->seekg( i
- 8, ios::end
);
183 if( str
!= "startxref" )
185 throw "Expected \"startxref\", found " + str
;
193 (*isPtr
) >> xrefTmp
;
194 isPtr
->seekg( xrefTmp
, ios::beg
);
199 std::ostringstream msg
;
200 msg
<< "Expected \"xref\" at " << xrefTmp
;
209 throw "Expected a 0 before size of xref.";
212 (*isPtr
) >> xrefSize
;
213 xref
= isPtr
->tellg( );
215 isPtr
->seekg( xref
+ 20 * xrefSize
);
217 if( str
!= "trailer" )
219 throw "Expected \"trailer\" at this point.";
222 RefCountPtr
< PDF_Object
> trailerMem( parse( ) );
223 PDF_Dictionary
* trailer( dynamic_cast< PDF_Dictionary
* >( trailerMem
.getPtr( ) ) );
226 throw "Failed to parse the trailer dictionary.";
228 RefCountPtr
< PDF_Object
> rootMem( trailer
->dic
[ "Root" ] );
229 PDF_Indirect
* indirectRoot( dynamic_cast< PDF_Indirect
* >( rootMem
.getPtr( ) ) );
230 if( indirectRoot
== 0)
232 throw "I believe the Root field of the trailer dictionary is an indirect object...";
234 pages
= follow
<PDF_Vector
>( follow
<PDF_Dictionary
>( follow
<PDF_Dictionary
>( trailer
->operator[]( "Root" ) )->operator[]( "Pages" ) )->operator[]( "Kids" ) );
237 SimplePDF::PDF_in::~PDF_in( )
242 SimplePDF::PDF_in::xreflookup( size_t i
, size_t v
)
246 throw( "xref index out of bounds" );
248 isPtr
->seekg( xref
+ 20 * i
, ios::beg
);
254 RefCountPtr
< PDF_Object
> PDF_in::readObjectAt( streamoff pos
)
256 isPtr
->seekg( pos
, ios::beg
);
260 RefCountPtr
< PDF_Object
> PDF_in::readObjectNumbered( size_t i
, size_t v
)
264 return readObjectAt( xreflookup( i
, v
) );
266 catch( const std::string
& ball
)
268 std::ostringstream oss
;
269 oss
<< "While parsing object " << i
<< " " << v
<< " at byte offset " << xreflookup( i
, v
)
270 << ", the following error occurred: " << ball
;
275 RefCountPtr
< PDF_Object
>
278 PdfScanner
pdfscanner( isPtr
);
279 vector
< PdfScanner::UnionType
> objectStack
;
280 vector
< int > tokenStack
;
281 ostringstream stringMem
;
282 PdfScanner::UnionType dummyVal
;
285 int token( pdfscanner
.yylex( ) );
290 objectStack
.push_back( dummyVal
);
291 tokenStack
.push_back( token
);
296 objectStack
.push_back( pdfscanner
.yylval
);
297 tokenStack
.push_back( token
);
300 objectStack
.push_back( pdfscanner
.yylval
);
301 objectStack
.back( ).pdfR
->PDFin
= this;
302 tokenStack
.push_back( T_Constant
);
308 stringMem
<< *pdfscanner
.yylval
.str
;
309 delete( pdfscanner
.yylval
.str
);
313 objectStack
.push_back( PdfScanner::UnionType( ) );
314 objectStack
.back( ).pdfObj
= new PDF_LiteralString( stringMem
.str( ).c_str( ) );
315 tokenStack
.push_back( T_Constant
);
319 RefCountPtr
< PDF_Object
> res
;
320 switch( tokenStack
.back( ) )
323 res
= RefCountPtr
< PDF_Object
>( objectStack
.back( ).pdfObj
);
326 res
= RefCountPtr
< PDF_Object
>( new PDF_Name( objectStack
.back( ).str
) );
329 throw( string( "Expected a complete object before endobj." ) );
331 objectStack
.pop_back( );
332 tokenStack
.pop_back( );
334 if( tokenStack
.back( ) != T_obj
)
336 throw( string( "There wasn't exactly 1 object contained within obj...endobj." ) );
338 tokenStack
.pop_back( );
339 PDF_Indirect
* objRef( reinterpret_cast< PDF_Indirect
* >( objectStack
.back( ).pdfObj
) );
340 objectStack
.pop_back( );
348 PDF_Dictionary
* streamDic( dynamic_cast< PDF_Dictionary
* >( objectStack
.back( ).pdfObj
) );
349 objectStack
.pop_back( );
350 tokenStack
.pop_back( );
353 throw( string( "Missing stream dictionary" ) );
355 objectStack
.push_back( PdfScanner::UnionType( ) );
356 objectStack
.back( ).pdfObj
= new PDF_Stream_in( streamDic
, isPtr
, isPtr
->tellg( ) );
357 tokenStack
.push_back( T_Constant
);
359 /* streamDic->getLength( ) may destroy the get position! */
360 streamoff tmp
= isPtr
->tellg( );
361 size_t length
= streamDic
->getLength( );
362 isPtr
->seekg( tmp
+ length
, ios::beg
);
365 pdfscanner
.yyrestart( isPtr
);
366 token
= pdfscanner
.yylex( );
367 if( token
!= T_endstream
)
369 throw( string( "Stream dictionary didn't tell the right length of the stream." ) );
374 throw( string( "Isolated endstream encountered" ) );
378 list
< PDF_Object
* > tmpList
;
379 int popToken( tokenStack
.back( ) );
380 while( popToken
!= '[' )
385 tmpList
.push_front( objectStack
.back( ).pdfObj
);
388 tmpList
.push_front( new PDF_Name( objectStack
.back( ).str
) );
391 throw( string( "Expected only constant values when closing vector" ) );
393 tokenStack
.pop_back( );
394 objectStack
.pop_back( );
396 popToken
= tokenStack
.back( );
399 tokenStack
.pop_back( );
400 objectStack
.pop_back( );
402 PDF_Vector
* newVec( new PDF_Vector( ) );
403 newVec
->vec
.reserve( tmpList
.size( ) );
404 for( list
< PDF_Object
* >::iterator
i( tmpList
.begin( ) ); i
!= tmpList
.end( ); ++i
)
406 newVec
->vec
.push_back( RefCountPtr
< PDF_Object
>( *i
) );
409 tokenStack
.push_back( T_Constant
);
410 objectStack
.push_back( PdfScanner::UnionType( ) );
411 objectStack
.back( ).pdfObj
= newVec
;
416 PDF_Dictionary
* newDic( new PDF_Dictionary( ) );
417 int popToken( tokenStack
.back( ) );
418 while( popToken
!= T_OpenDic
)
424 theObj
= objectStack
.back( ).pdfObj
;
427 theObj
= new PDF_Name( objectStack
.back( ).str
);
430 throw( string( "Expected constant value at this position when closing dictionary" ) );
432 tokenStack
.pop_back( );
433 objectStack
.pop_back( );
435 popToken
= tokenStack
.back( );
436 if( popToken
!= T_Name
)
438 throw( string( "Expected name at this position when closing dictionary" ) );
440 char * theName
= objectStack
.back( ).str
;
441 newDic
->dic
[ theName
] = RefCountPtr
< PDF_Object
>( theObj
);
443 tokenStack
.pop_back( );
444 objectStack
.pop_back( );
446 popToken
= tokenStack
.back( );
448 tokenStack
.pop_back( );
449 objectStack
.pop_back( );
451 if( objectStack
.empty( ) )
453 return RefCountPtr
< PDF_Object
>( newDic
);
455 tokenStack
.push_back( T_Constant
);
456 objectStack
.push_back( PdfScanner::UnionType( ) );
457 objectStack
.back( ).pdfObj
= newDic
;
461 throw( string( "Unrecognized token type." ) );
464 throw( "Internal error in PDF_in::parse: Infinite loop should not be broken, only returned from." );
467 SimplePDF::PDF_in::PageIterator
468 SimplePDF::PDF_in::beginPages( )
470 return SimplePDF::PDF_in::PageIterator( *this, 0 );
473 SimplePDF::PDF_in::PageIterator
474 SimplePDF::PDF_in::endPages( )
476 return SimplePDF::PDF_in::PageIterator( *this, getPageCount( ) );
480 SimplePDF::PDF_in::getPageCount( )
483 typedef typeof pages
->vec ListType
;
484 for( ListType::const_iterator i
= pages
->vec
.begin( ); i
!= pages
->vec
.end( ); ++i
)
486 RefCountPtr
< PDF_Dictionary
> kid( follow
< PDF_Dictionary
>( *i
) );
487 if( kid
->isPages( ) )
489 count
+= kid
->getCount( );
500 RefCountPtr
< PDF_Dictionary
>
501 SimplePDF::PDF_in::getPage( size_t pageNo
)
503 RefCountPtr
< PDF_Vector
> kids
= pages
;
506 bool doAgain
= false;
507 typedef typeof kids
->vec ListType
;
508 for( ListType::const_iterator i
= kids
->vec
.begin( ); i
!= kids
->vec
.end( ); ++i
)
510 RefCountPtr
< PDF_Dictionary
> kid( follow
< PDF_Dictionary
>( *i
) );
511 if( kid
->isPages( ) )
513 size_t count
= kid
->getCount( );
516 kids
= follow
< PDF_Vector
>( (*kid
)[ "Kids" ] );
539 throw( "Page number out of range" );
542 throw( "Internal error in PDF_in::getPage: Infinite loop should not be broken, only returned from." );