2 #include "directory.hpp"
3 #include "serialization.hpp"
9 #include <boost/iostreams/categories.hpp>
10 #include <boost/iostreams/copy.hpp>
11 #include <boost/iostreams/stream.hpp>
12 #include <boost/iostreams/stream_buffer.hpp>
13 #include <boost/iostreams/filter/symmetric.hpp>
14 #include <boost/iostreams/filter/zlib.hpp>
15 #include <boost/iostreams/filter/bzip2.hpp>
16 #include <boost/iostreams/filtering_stream.hpp>
17 #include <boost/iostreams/device/back_inserter.hpp>
26 typedef char char_type
;
27 typedef boost::iostreams::source_tag category
;
28 file_input(std::ifstream
& _stream
, size_t* _refcnt
)
29 : stream(_stream
), stream_refcnt(*_refcnt
)
32 position
= stream
.tellg();
33 left_unlimited
= true;
36 file_input(std::ifstream
& _stream
, uint64_t size
, size_t* _refcnt
)
37 : stream(_stream
), stream_refcnt(*_refcnt
)
40 position
= stream
.tellg();
41 left_unlimited
= false;
49 std::streamsize
read(char* s
, std::streamsize n
)
52 stream
.seekg(position
, std::ios_base::beg
);
54 throw std::runtime_error("Can't seek ZIP file");
55 if(!left_unlimited
&& left
== 0)
57 if(!left_unlimited
&& n
> (int64_t)left
)
60 std::streamsize r
= stream
.gcount();
61 if(r
== 0 && stream
.fail())
62 throw std::runtime_error("Can't read compressed data from ZIP file");
72 if(!--stream_refcnt
) {
74 delete &stream_refcnt
;
78 file_input(const file_input
& f
)
79 : stream(f
.stream
), stream_refcnt(f
.stream_refcnt
)
82 position
= f
.position
;
83 left_unlimited
= f
.left_unlimited
;
87 std::ifstream
& stream
;
88 size_t& stream_refcnt
;
89 std::streamoff position
;
93 file_input
& operator=(const file_input
& f
);
99 typedef char char_type
;
100 typedef boost::iostreams::sink_tag category
;
101 vector_output(std::vector
<char>& _stream
)
110 std::streamsize
write(const char* s
, std::streamsize n
)
112 size_t oldsize
= stream
.size();
113 stream
.resize(oldsize
+ n
);
114 memcpy(&stream
[oldsize
], s
, n
);
118 std::vector
<char>& stream
;
121 class size_and_crc_filter_impl
124 typedef char char_type
;
126 size_and_crc_filter_impl()
129 crc
= ::crc32(0, NULL
, 0);
136 bool filter(const char*& src_begin
, const char* src_end
, char*& dest_begin
, char* dest_end
,
139 ptrdiff_t amount
= src_end
- src_begin
;
140 if(flush
&& amount
== 0)
142 if(amount
> dest_end
- dest_begin
)
143 amount
= dest_end
- dest_begin
;
145 crc
= ::crc32(crc
, reinterpret_cast<const unsigned char*>(src_begin
), amount
);
146 memcpy(dest_begin
, src_begin
, amount
);
148 dest_begin
+= amount
;
166 class size_and_crc_filter
: public boost::iostreams::symmetric_filter
<size_and_crc_filter_impl
,
167 std::allocator
<char>>
169 typedef symmetric_filter
<size_and_crc_filter_impl
, std::allocator
<char>> base_type
;
171 typedef typename
base_type::char_type char_type
;
172 typedef typename
base_type::category category
;
173 size_and_crc_filter(int bsize
)
180 return filter().size();
185 return filter().crc32();
189 struct zipfile_member_info
191 bool central_directory_special
; //Central directory, not real member.
192 uint16_t version_needed
;
194 uint16_t compression
;
198 uint32_t compressed_size
;
199 uint32_t uncompressed_size
;
200 std::string filename
;
201 uint32_t header_offset
;
202 uint32_t data_offset
;
203 uint32_t next_offset
;
206 //Parse member starting from current offset.
207 zipfile_member_info
parse_member(std::ifstream
& file
)
209 zipfile_member_info info
;
210 info
.central_directory_special
= false;
211 info
.header_offset
= file
.tellg();
212 //The file header is 30 bytes (this could also hit central header, but that's even larger).
213 unsigned char buffer
[30];
214 if(!(file
.read(reinterpret_cast<char*>(buffer
), 30)))
215 throw std::runtime_error("Can't read file header from ZIP file");
216 uint32_t magic
= serialization::u32l(buffer
);
217 if(magic
== 0x02014b50) {
218 info
.central_directory_special
= true;
221 if(magic
!= 0x04034b50)
222 throw std::runtime_error("ZIP archive corrupt: Expected file or central directory magic");
223 info
.version_needed
= serialization::u16l(buffer
+ 4);
224 info
.flags
= serialization::u16l(buffer
+ 6);
225 info
.compression
= serialization::u16l(buffer
+ 8);
226 info
.mtime_time
= serialization::u16l(buffer
+ 10);
227 info
.mtime_day
= serialization::u16l(buffer
+ 12);
228 info
.crc
= serialization::u32l(buffer
+ 14);
229 info
.compressed_size
= serialization::u32l(buffer
+ 18);
230 info
.uncompressed_size
= serialization::u32l(buffer
+ 22);
231 uint16_t filename_len
= serialization::u16l(buffer
+ 26);
232 uint16_t extra_len
= serialization::u16l(buffer
+ 28);
234 throw std::runtime_error("Unsupported ZIP feature: Empty filename not allowed");
235 if(info
.version_needed
> 20 && info
.version_needed
!= 46) {
236 throw std::runtime_error("Unsupported ZIP feature: Only ZIP versions up to 2.0 supported");
238 if(info
.flags
& 0x2001)
239 throw std::runtime_error("Unsupported ZIP feature: Encryption is not supported");
241 throw std::runtime_error("Unsupported ZIP feature: Indeterminate length not supported");
242 if(info
.flags
& 0x20)
243 throw std::runtime_error("Unsupported ZIP feature: Binary patching is not supported");
244 if(info
.compression
!= 0 && info
.compression
!= 8 && info
.compression
!= 12)
245 throw std::runtime_error("Unsupported ZIP feature: Unsupported compression method");
246 if(info
.compression
== 0 && info
.compressed_size
!= info
.uncompressed_size
)
247 throw std::runtime_error("ZIP archive corrupt: csize ≠ usize for stored member");
248 std::vector
<unsigned char> filename_storage
;
249 filename_storage
.resize(filename_len
);
250 if(!(file
.read(reinterpret_cast<char*>(&filename_storage
[0]), filename_len
)))
251 throw std::runtime_error("Can't read file name from zip file");
252 info
.filename
= std::string(reinterpret_cast<char*>(&filename_storage
[0]), filename_len
);
253 info
.data_offset
= info
.header_offset
+ 30 + filename_len
+ extra_len
;
254 info
.next_offset
= info
.data_offset
+ info
.compressed_size
;
259 bool reader::has_member(const std::string
& name
) throw()
261 return (offsets
.count(name
) > 0);
264 std::string
reader::find_first() throw(std::bad_alloc
)
269 return offsets
.begin()->first
;
272 std::string
reader::find_next(const std::string
& name
) throw(std::bad_alloc
)
274 auto i
= offsets
.upper_bound(name
);
275 if(i
== offsets
.end())
281 std::istream
& reader::operator[](const std::string
& name
) throw(std::bad_alloc
, std::runtime_error
)
283 if(!offsets
.count(name
))
284 throw std::runtime_error("No such file '" + name
+ "' in zip archive");
286 zipstream
->seekg(offsets
[name
], std::ios::beg
);
287 zipfile_member_info info
= parse_member(*zipstream
);
289 zipstream
->seekg(info
.data_offset
, std::ios::beg
);
290 if(info
.compression
== 0) {
291 return *new boost::iostreams::stream
<file_input
>(*zipstream
, info
.uncompressed_size
, refcnt
);
292 } else if(info
.compression
== 8) {
293 boost::iostreams::filtering_istream
* s
= new boost::iostreams::filtering_istream();
294 boost::iostreams::zlib_params params
;
295 params
.noheader
= true;
296 s
->push(boost::iostreams::zlib_decompressor(params
));
297 s
->push(file_input(*zipstream
, info
.compressed_size
, refcnt
));
299 } else if(info
.compression
== 12) {
301 boost::iostreams::filtering_istream
* s
= new boost::iostreams::filtering_istream();
302 s
->push(boost::iostreams::bzip2_decompressor());
303 s
->push(file_input(*zipstream
, info
.compressed_size
, refcnt
));
306 throw std::runtime_error("Unsupported ZIP feature: Unsupported compression method");
309 reader::iterator
reader::begin() throw(std::bad_alloc
)
311 return iterator(offsets
.begin());
314 reader::iterator
reader::end() throw(std::bad_alloc
)
316 return iterator(offsets
.end());
319 reader::riterator
reader::rbegin() throw(std::bad_alloc
)
321 return riterator(offsets
.rbegin());
324 reader::riterator
reader::rend() throw(std::bad_alloc
)
326 return riterator(offsets
.rend());
329 reader::~reader() throw()
337 reader::reader(const std::string
& zipfile
) throw(std::bad_alloc
, std::runtime_error
)
339 if(!directory::is_regular(zipfile
))
340 throw std::runtime_error("Zipfile '" + zipfile
+ "' is not regular file");
344 zipfile_member_info info
;
345 info
.next_offset
= 0;
346 zipstream
= new std::ifstream
;
347 zipstream
->open(zipfile
.c_str(), std::ios::binary
);
351 throw std::runtime_error("Can't open zipfile '" + zipfile
+ "' for reading");
354 zipstream
->seekg(info
.next_offset
);
355 if(zipstream
->fail())
356 throw std::runtime_error("Can't seek ZIP file");
357 info
= parse_member(*zipstream
);
358 if(info
.central_directory_special
)
360 offsets
[info
.filename
] = info
.header_offset
;
369 bool reader::read_linefile(const std::string
& member
, std::string
& out
, bool conditional
)
370 throw(std::bad_alloc
, std::runtime_error
)
372 if(conditional
&& !has_member(member
))
374 std::istream
& m
= (*this)[member
];
376 std::getline(m
, out
);
386 void reader::read_raw_file(const std::string
& member
, std::vector
<char>& out
) throw(std::bad_alloc
,
389 std::vector
<char> _out
;
390 std::istream
& m
= (*this)[member
];
392 boost::iostreams::back_insert_device
<std::vector
<char>> rd(_out
);
393 boost::iostreams::copy(m
, rd
);
402 writer::writer(const std::string
& zipfile
, unsigned _compression
) throw(std::bad_alloc
, std::runtime_error
)
404 compression
= _compression
;
405 zipfile_path
= zipfile
;
406 temp_path
= zipfile
+ ".tmp";
407 zipstream
= new std::ofstream(temp_path
.c_str(), std::ios::binary
);
409 throw std::runtime_error("Can't open zipfile '" + temp_path
+ "' for writing");
411 system_stream
= true;
414 writer::writer(std::ostream
& stream
, unsigned _compression
) throw(std::bad_alloc
, std::runtime_error
)
416 compression
= _compression
;
419 system_stream
= false;
422 writer::~writer() throw()
424 if(!committed
&& system_stream
)
425 remove(temp_path
.c_str());
430 void writer::commit() throw(std::bad_alloc
, std::logic_error
, std::runtime_error
)
433 throw std::logic_error("Can't commit twice");
435 throw std::logic_error("Can't commit with file open");
436 std::vector
<unsigned char> directory_entry
;
437 uint32_t cdirsize
= 0;
438 uint32_t cdiroff
= zipstream
->tellp();
439 if(cdiroff
== (uint32_t)-1)
440 throw std::runtime_error("Can't read current ZIP stream position");
441 for(auto i
: files
) {
442 cdirsize
+= (46 + i
.first
.length());
443 directory_entry
.resize(46 + i
.first
.length());
444 serialization::u32l(&directory_entry
[0], 0x02014b50);
445 serialization::u16l(&directory_entry
[4], 3);
446 serialization::u16l(&directory_entry
[6], 20);
447 serialization::u16l(&directory_entry
[8], 0);
448 serialization::u16l(&directory_entry
[10], compression
? 8 : 0);
449 serialization::u16l(&directory_entry
[12], 0);
450 serialization::u16l(&directory_entry
[14], 10273);
451 serialization::u32l(&directory_entry
[16], i
.second
.crc
);
452 serialization::u32l(&directory_entry
[20], i
.second
.compressed_size
);
453 serialization::u32l(&directory_entry
[24], i
.second
.uncompressed_size
);
454 serialization::u16l(&directory_entry
[28], i
.first
.length());
455 serialization::u16l(&directory_entry
[30], 0);
456 serialization::u16l(&directory_entry
[32], 0);
457 serialization::u16l(&directory_entry
[34], 0);
458 serialization::u16l(&directory_entry
[36], 0);
459 serialization::u32l(&directory_entry
[38], 0);
460 serialization::u32l(&directory_entry
[42], i
.second
.offset
);
461 memcpy(&directory_entry
[46], i
.first
.c_str(), i
.first
.length());
462 zipstream
->write(reinterpret_cast<char*>(&directory_entry
[0]), directory_entry
.size());
464 throw std::runtime_error("Failed to write central directory entry to output file");
466 directory_entry
.resize(22);
467 serialization::u32l(&directory_entry
[0], 0x06054b50);
468 serialization::u16l(&directory_entry
[4], 0);
469 serialization::u16l(&directory_entry
[6], 0);
470 serialization::u16l(&directory_entry
[8], files
.size());
471 serialization::u16l(&directory_entry
[10], files
.size());
472 serialization::u32l(&directory_entry
[12], cdirsize
);
473 serialization::u32l(&directory_entry
[16], cdiroff
);
474 serialization::u16l(&directory_entry
[20], 0);
475 zipstream
->write(reinterpret_cast<char*>(&directory_entry
[0]), directory_entry
.size());
477 throw std::runtime_error("Failed to write central directory end marker to output file");
479 dynamic_cast<std::ofstream
*>(zipstream
)->close();
480 std::string backup
= zipfile_path
+ ".backup";
481 directory::rename_overwrite(zipfile_path
.c_str(), backup
.c_str());
482 if(directory::rename_overwrite(temp_path
.c_str(), zipfile_path
.c_str()) < 0)
483 throw std::runtime_error("Can't rename '" + temp_path
+ "' -> '" + zipfile_path
+ "'");
488 std::ostream
& writer::create_file(const std::string
& name
) throw(std::bad_alloc
, std::logic_error
,
492 throw std::logic_error("Can't open file with file open");
494 throw std::runtime_error("Bad member name");
495 current_compressed_file
.resize(0);
496 s
= new boost::iostreams::filtering_ostream();
497 s
->push(size_and_crc_filter(4096));
499 boost::iostreams::zlib_params params
;
500 params
.noheader
= true;
501 s
->push(boost::iostreams::zlib_compressor(params
));
503 s
->push(vector_output(current_compressed_file
));
508 void writer::close_file() throw(std::bad_alloc
, std::logic_error
, std::runtime_error
)
511 throw std::logic_error("Can't close file with no file open");
512 uint32_t ucs
, cs
, crc32
;
513 boost::iostreams::close(*s
);
514 size_and_crc_filter
& f
= *s
->component
<size_and_crc_filter
>(0);
515 cs
= current_compressed_file
.size();
520 base_offset
= zipstream
->tellp();
521 if(base_offset
== (uint32_t)-1)
522 throw std::runtime_error("Can't read current ZIP stream position");
523 unsigned char header
[30];
524 memset(header
, 0, 30);
525 serialization::u32l(header
, 0x04034b50);
528 header
[8] = compression
? 8 : 0;
531 serialization::u32l(header
+ 14, crc32
);
532 serialization::u32l(header
+ 18, cs
);
533 serialization::u32l(header
+ 22, ucs
);
534 serialization::u16l(header
+ 26, open_file
.length());
535 zipstream
->write(reinterpret_cast<char*>(header
), 30);
536 zipstream
->write(open_file
.c_str(), open_file
.length());
537 zipstream
->write(¤t_compressed_file
[0], current_compressed_file
.size());
539 throw std::runtime_error("Can't write member to ZIP file");
540 current_compressed_file
.resize(0);
543 info
.uncompressed_size
= ucs
;
544 info
.compressed_size
= cs
;
545 info
.offset
= base_offset
;
546 files
[open_file
] = info
;
550 void writer::write_linefile(const std::string
& member
, const std::string
& value
, bool conditional
)
551 throw(std::bad_alloc
, std::runtime_error
)
553 if(conditional
&& value
== "")
555 std::ostream
& m
= create_file(member
);
557 m
<< value
<< std::endl
;
565 void writer::write_raw_file(const std::string
& member
, const std::vector
<char>& content
) throw(std::bad_alloc
,
568 std::ostream
& m
= create_file(member
);
570 m
.write(&content
[0], content
.size());
572 throw std::runtime_error("Can't write ZIP file member");
582 #if defined(_WIN32) || defined(_WIN64) || defined(TEST_WIN32_CODE)
583 const char* path_splitters
= "\\/";
584 bool drives_allowed
= true;
586 //Assume Unix(-like) system.
587 const char* path_splitters
= "/";
588 bool drives_allowed
= false;
591 const char* str_index(const char* str
, int ch
)
593 for(size_t i
= 0; str
[i
]; i
++)
599 bool ispathsep(char ch
)
601 return (str_index(path_splitters
, static_cast<int>(static_cast<unsigned char>(ch
))) != NULL
);
604 bool isroot(const std::string
& path
)
606 if(path
.length() == 1 && ispathsep(path
[0]))
609 //NO more cases for this.
611 if(path
.length() == 3 && ((path
[0] >= 'A' && path
[0] <= 'Z') || (path
[0] >= 'a' && path
[0] < '<')) &&
612 path
[1] == ':' && ispathsep(path
[2]))
615 if(path
.length() <= 3 || !ispathsep(path
[0]) || !ispathsep(path
[1]) ||
616 !ispathsep(path
[path
.length() - 1]))
618 return (path
.find_first_of(path_splitters
, 2) == path
.length() - 1);
621 std::string
walk(const std::string
& path
, const std::string
& component
)
623 if(component
== "" || component
== ".")
626 else if(component
== "..") {
628 if(path
== "" || isroot(path
))
629 throw std::runtime_error("Can't rise to containing directory");
630 std::string _path
= path
;
631 size_t split
= _path
.find_last_of(path_splitters
);
632 if(split
< _path
.length())
633 return _path
.substr(0, split
);
636 } else if(path
== "" || ispathsep(path
[path
.length() - 1]))
637 return path
+ component
;
639 return path
+ "/" + component
;
642 std::string
combine_path(const std::string
& _name
, const std::string
& _referencing_path
)
644 std::string name
= _name
;
645 std::string referencing_path
= _referencing_path
;
646 size_t x
= referencing_path
.find_last_of(path_splitters
);
647 if(x
< referencing_path
.length())
648 referencing_path
= referencing_path
.substr(0, x
);
651 //Check if name is absolute.
652 if(ispathsep(name
[0]))
654 if(drives_allowed
&& name
.length() >= 3 && ((name
[0] >= 'A' && name
[0] <= 'Z') || (name
[0] >= 'a' &&
655 name
[0] <= 'z')) && name
[1] == ':' && ispathsep(name
[2]))
657 //It is not absolute.
658 std::string path
= referencing_path
;
661 size_t split
= name
.find_first_of(path_splitters
, pindex
);
663 if(split
< name
.length())
664 c
= name
.substr(pindex
, split
- pindex
);
666 c
= name
.substr(pindex
);
667 path
= walk(path
, c
);
668 if(split
< name
.length())
673 //If path becomes empty, assume it means current directory.
680 std::string
resolverel(const std::string
& name
, const std::string
& referencing_path
) throw(std::bad_alloc
,
683 return combine_path(name
, referencing_path
);
686 std::istream
& openrel(const std::string
& name
, const std::string
& referencing_path
) throw(std::bad_alloc
,
689 std::string path_to_open
= combine_path(name
, referencing_path
);
690 std::string final_path
= path_to_open
;
691 //Try to open this from the main OS filesystem.
692 if(directory::is_regular(path_to_open
)) {
693 std::ifstream
* i
= new std::ifstream(path_to_open
.c_str(), std::ios::binary
);
699 //Didn't succeed. Try to open as ZIP archive.
700 std::string membername
;
702 size_t split
= path_to_open
.find_last_of("/");
703 if(split
>= path_to_open
.length())
704 throw std::runtime_error("Can't open '" + final_path
+ "'");
705 //Move a component to member name.
707 membername
= path_to_open
.substr(split
+ 1) + "/" + membername
;
709 membername
= path_to_open
.substr(split
+ 1);
710 path_to_open
= path_to_open
.substr(0, split
);
711 if(directory::is_regular(path_to_open
))
713 reader
r(path_to_open
);
714 return r
[membername
];
715 } catch(std::bad_alloc
& e
) {
717 } catch(std::runtime_error
& e
) {
722 std::vector
<char> readrel(const std::string
& name
, const std::string
& referencing_path
) throw(std::bad_alloc
,
725 std::vector
<char> out
;
726 std::istream
& s
= openrel(name
, referencing_path
);
727 boost::iostreams::back_insert_device
<std::vector
<char>> rd(out
);
728 boost::iostreams::copy(s
, rd
);
733 bool file_exists(const std::string
& name
) throw(std::bad_alloc
)
735 std::string path_to_open
= name
;
736 std::string final_path
= path_to_open
;
737 if(directory::is_regular(path_to_open
))
739 //Didn't succeed. Try to open as ZIP archive.
740 std::string membername
;
742 size_t split
= path_to_open
.find_last_of("/");
743 if(split
>= path_to_open
.length())
745 //Move a component to member name.
747 membername
= path_to_open
.substr(split
+ 1) + "/" + membername
;
749 membername
= path_to_open
.substr(split
+ 1);
750 path_to_open
= path_to_open
.substr(0, split
);
751 if(directory::is_regular(path_to_open
))
753 reader
r(path_to_open
);
754 return r
.has_member(membername
);
755 } catch(std::bad_alloc
& e
) {
757 } catch(std::runtime_error
& e
) {