lsnes rr2-β24
[lsnes.git] / src / library / zip.cpp
blobc5c5776b545f23e7b543d01945f6d27e1efef474
1 #include "zip.hpp"
2 #include "directory.hpp"
3 #include "serialization.hpp"
5 #include <cstdint>
6 #include <cstring>
7 #include <iostream>
8 #include <stdexcept>
9 #include <boost/iostreams/categories.hpp>
10 #include <boost/iostreams/copy.hpp>
11 #include <boost/iostreams/stream.hpp>
12 #include <boost/iostreams/stream_buffer.hpp>
13 #include <boost/iostreams/filter/symmetric.hpp>
14 #include <boost/iostreams/filter/zlib.hpp>
15 #include <boost/iostreams/filter/bzip2.hpp>
16 #include <boost/iostreams/filtering_stream.hpp>
17 #include <boost/iostreams/device/back_inserter.hpp>
19 namespace zip
21 namespace
23 class file_input
25 public:
26 typedef char char_type;
27 typedef boost::iostreams::source_tag category;
28 file_input(std::ifstream& _stream, size_t* _refcnt)
29 : stream(_stream), stream_refcnt(*_refcnt)
31 stream_refcnt++;
32 position = stream.tellg();
33 left_unlimited = true;
36 file_input(std::ifstream& _stream, uint64_t size, size_t* _refcnt)
37 : stream(_stream), stream_refcnt(*_refcnt)
39 stream_refcnt++;
40 position = stream.tellg();
41 left_unlimited = false;
42 left = size;
45 void close()
49 std::streamsize read(char* s, std::streamsize n)
51 stream.clear();
52 stream.seekg(position, std::ios_base::beg);
53 if(stream.fail())
54 throw std::runtime_error("Can't seek ZIP file");
55 if(!left_unlimited && left == 0)
56 return -1;
57 if(!left_unlimited && n > (int64_t)left)
58 n = left;
59 stream.read(s, n);
60 std::streamsize r = stream.gcount();
61 if(r == 0 && stream.fail())
62 throw std::runtime_error("Can't read compressed data from ZIP file");
63 if(!stream && r == 0)
64 return -1;
65 position += r;
66 left -= r;
67 return r;
70 ~file_input()
72 if(!--stream_refcnt) {
73 delete &stream;
74 delete &stream_refcnt;
78 file_input(const file_input& f)
79 : stream(f.stream), stream_refcnt(f.stream_refcnt)
81 stream_refcnt++;
82 position = f.position;
83 left_unlimited = f.left_unlimited;
84 left = f.left;
86 protected:
87 std::ifstream& stream;
88 size_t& stream_refcnt;
89 std::streamoff position;
90 bool left_unlimited;
91 uint64_t left;
92 private:
93 file_input& operator=(const file_input& f);
96 class vector_output
98 public:
99 typedef char char_type;
100 typedef boost::iostreams::sink_tag category;
101 vector_output(std::vector<char>& _stream)
102 : stream(_stream)
106 void close()
110 std::streamsize write(const char* s, std::streamsize n)
112 size_t oldsize = stream.size();
113 stream.resize(oldsize + n);
114 memcpy(&stream[oldsize], s, n);
115 return n;
117 protected:
118 std::vector<char>& stream;
121 class size_and_crc_filter_impl
123 public:
124 typedef char char_type;
126 size_and_crc_filter_impl()
128 dsize = 0;
129 crc = ::crc32(0, NULL, 0);
132 void close()
136 bool filter(const char*& src_begin, const char* src_end, char*& dest_begin, char* dest_end,
137 bool flush)
139 ptrdiff_t amount = src_end - src_begin;
140 if(flush && amount == 0)
141 return false;
142 if(amount > dest_end - dest_begin)
143 amount = dest_end - dest_begin;
144 dsize += amount;
145 crc = ::crc32(crc, reinterpret_cast<const unsigned char*>(src_begin), amount);
146 memcpy(dest_begin, src_begin, amount);
147 src_begin += amount;
148 dest_begin += amount;
149 return true;
152 uint32_t size()
154 return dsize;
157 uint32_t crc32()
159 return crc;
161 private:
162 uint32_t dsize;
163 uint32_t crc;
166 class size_and_crc_filter : public boost::iostreams::symmetric_filter<size_and_crc_filter_impl,
167 std::allocator<char>>
169 typedef symmetric_filter<size_and_crc_filter_impl, std::allocator<char>> base_type;
170 public:
171 typedef typename base_type::char_type char_type;
172 typedef typename base_type::category category;
173 size_and_crc_filter(int bsize)
174 : base_type(bsize)
178 uint32_t size()
180 return filter().size();
183 uint32_t crc32()
185 return filter().crc32();
189 struct zipfile_member_info
191 bool central_directory_special; //Central directory, not real member.
192 uint16_t version_needed;
193 uint16_t flags;
194 uint16_t compression;
195 uint16_t mtime_time;
196 uint16_t mtime_day;
197 uint32_t crc;
198 uint32_t compressed_size;
199 uint32_t uncompressed_size;
200 std::string filename;
201 uint32_t header_offset;
202 uint32_t data_offset;
203 uint32_t next_offset;
206 //Parse member starting from current offset.
207 zipfile_member_info parse_member(std::ifstream& file)
209 zipfile_member_info info;
210 info.central_directory_special = false;
211 info.header_offset = file.tellg();
212 //The file header is 30 bytes (this could also hit central header, but that's even larger).
213 unsigned char buffer[30];
214 if(!(file.read(reinterpret_cast<char*>(buffer), 30)))
215 throw std::runtime_error("Can't read file header from ZIP file");
216 uint32_t magic = serialization::u32l(buffer);
217 if(magic == 0x02014b50) {
218 info.central_directory_special = true;
219 return info;
221 if(magic != 0x04034b50)
222 throw std::runtime_error("ZIP archive corrupt: Expected file or central directory magic");
223 info.version_needed = serialization::u16l(buffer + 4);
224 info.flags = serialization::u16l(buffer + 6);
225 info.compression = serialization::u16l(buffer + 8);
226 info.mtime_time = serialization::u16l(buffer + 10);
227 info.mtime_day = serialization::u16l(buffer + 12);
228 info.crc = serialization::u32l(buffer + 14);
229 info.compressed_size = serialization::u32l(buffer + 18);
230 info.uncompressed_size = serialization::u32l(buffer + 22);
231 uint16_t filename_len = serialization::u16l(buffer + 26);
232 uint16_t extra_len = serialization::u16l(buffer + 28);
233 if(!filename_len)
234 throw std::runtime_error("Unsupported ZIP feature: Empty filename not allowed");
235 if(info.version_needed > 20 && info.version_needed != 46) {
236 throw std::runtime_error("Unsupported ZIP feature: Only ZIP versions up to 2.0 supported");
238 if(info.flags & 0x2001)
239 throw std::runtime_error("Unsupported ZIP feature: Encryption is not supported");
240 if(info.flags & 0x8)
241 throw std::runtime_error("Unsupported ZIP feature: Indeterminate length not supported");
242 if(info.flags & 0x20)
243 throw std::runtime_error("Unsupported ZIP feature: Binary patching is not supported");
244 if(info.compression != 0 && info.compression != 8 && info.compression != 12)
245 throw std::runtime_error("Unsupported ZIP feature: Unsupported compression method");
246 if(info.compression == 0 && info.compressed_size != info.uncompressed_size)
247 throw std::runtime_error("ZIP archive corrupt: csize ≠ usize for stored member");
248 std::vector<unsigned char> filename_storage;
249 filename_storage.resize(filename_len);
250 if(!(file.read(reinterpret_cast<char*>(&filename_storage[0]), filename_len)))
251 throw std::runtime_error("Can't read file name from zip file");
252 info.filename = std::string(reinterpret_cast<char*>(&filename_storage[0]), filename_len);
253 info.data_offset = info.header_offset + 30 + filename_len + extra_len;
254 info.next_offset = info.data_offset + info.compressed_size;
255 return info;
259 bool reader::has_member(const std::string& name) throw()
261 return (offsets.count(name) > 0);
264 std::string reader::find_first() throw(std::bad_alloc)
266 if(offsets.empty())
267 return "";
268 else
269 return offsets.begin()->first;
272 std::string reader::find_next(const std::string& name) throw(std::bad_alloc)
274 auto i = offsets.upper_bound(name);
275 if(i == offsets.end())
276 return "";
277 else
278 return i->first;
281 std::istream& reader::operator[](const std::string& name) throw(std::bad_alloc, std::runtime_error)
283 if(!offsets.count(name))
284 throw std::runtime_error("No such file '" + name + "' in zip archive");
285 zipstream->clear();
286 zipstream->seekg(offsets[name], std::ios::beg);
287 zipfile_member_info info = parse_member(*zipstream);
288 zipstream->clear();
289 zipstream->seekg(info.data_offset, std::ios::beg);
290 if(info.compression == 0) {
291 return *new boost::iostreams::stream<file_input>(*zipstream, info.uncompressed_size, refcnt);
292 } else if(info.compression == 8) {
293 boost::iostreams::filtering_istream* s = new boost::iostreams::filtering_istream();
294 boost::iostreams::zlib_params params;
295 params.noheader = true;
296 s->push(boost::iostreams::zlib_decompressor(params));
297 s->push(file_input(*zipstream, info.compressed_size, refcnt));
298 return *s;
299 } else if(info.compression == 12) {
300 //Bzip2 compression.
301 boost::iostreams::filtering_istream* s = new boost::iostreams::filtering_istream();
302 s->push(boost::iostreams::bzip2_decompressor());
303 s->push(file_input(*zipstream, info.compressed_size, refcnt));
304 return *s;
305 } else
306 throw std::runtime_error("Unsupported ZIP feature: Unsupported compression method");
309 reader::iterator reader::begin() throw(std::bad_alloc)
311 return iterator(offsets.begin());
314 reader::iterator reader::end() throw(std::bad_alloc)
316 return iterator(offsets.end());
319 reader::riterator reader::rbegin() throw(std::bad_alloc)
321 return riterator(offsets.rbegin());
324 reader::riterator reader::rend() throw(std::bad_alloc)
326 return riterator(offsets.rend());
329 reader::~reader() throw()
331 if(!--*refcnt) {
332 delete zipstream;
333 delete refcnt;
337 reader::reader(const std::string& zipfile) throw(std::bad_alloc, std::runtime_error)
339 if(!directory::is_regular(zipfile))
340 throw std::runtime_error("Zipfile '" + zipfile + "' is not regular file");
341 zipstream = NULL;
342 refcnt = NULL;
343 try {
344 zipfile_member_info info;
345 info.next_offset = 0;
346 zipstream = new std::ifstream;
347 zipstream->open(zipfile.c_str(), std::ios::binary);
348 refcnt = new size_t;
349 *refcnt = 1;
350 if(!*zipstream)
351 throw std::runtime_error("Can't open zipfile '" + zipfile + "' for reading");
352 do {
353 zipstream->clear();
354 zipstream->seekg(info.next_offset);
355 if(zipstream->fail())
356 throw std::runtime_error("Can't seek ZIP file");
357 info = parse_member(*zipstream);
358 if(info.central_directory_special)
359 break;
360 offsets[info.filename] = info.header_offset;
361 } while(1);
362 } catch(...) {
363 delete zipstream;
364 delete refcnt;
365 throw;
369 bool reader::read_linefile(const std::string& member, std::string& out, bool conditional)
370 throw(std::bad_alloc, std::runtime_error)
372 if(conditional && !has_member(member))
373 return false;
374 std::istream& m = (*this)[member];
375 try {
376 std::getline(m, out);
377 istrip_CR(out);
378 delete &m;
379 } catch(...) {
380 delete &m;
381 throw;
383 return true;
386 void reader::read_raw_file(const std::string& member, std::vector<char>& out) throw(std::bad_alloc,
387 std::runtime_error)
389 std::vector<char> _out;
390 std::istream& m = (*this)[member];
391 try {
392 boost::iostreams::back_insert_device<std::vector<char>> rd(_out);
393 boost::iostreams::copy(m, rd);
394 delete &m;
395 } catch(...) {
396 delete &m;
397 throw;
399 out = _out;
402 writer::writer(const std::string& zipfile, unsigned _compression) throw(std::bad_alloc, std::runtime_error)
404 compression = _compression;
405 zipfile_path = zipfile;
406 temp_path = zipfile + ".tmp";
407 zipstream = new std::ofstream(temp_path.c_str(), std::ios::binary);
408 if(!*zipstream)
409 throw std::runtime_error("Can't open zipfile '" + temp_path + "' for writing");
410 committed = false;
411 system_stream = true;
414 writer::writer(std::ostream& stream, unsigned _compression) throw(std::bad_alloc, std::runtime_error)
416 compression = _compression;
417 zipstream = &stream;
418 committed = false;
419 system_stream = false;
422 writer::~writer() throw()
424 if(!committed && system_stream)
425 remove(temp_path.c_str());
426 if(system_stream)
427 delete zipstream;
430 void writer::commit() throw(std::bad_alloc, std::logic_error, std::runtime_error)
432 if(committed)
433 throw std::logic_error("Can't commit twice");
434 if(open_file != "")
435 throw std::logic_error("Can't commit with file open");
436 std::vector<unsigned char> directory_entry;
437 uint32_t cdirsize = 0;
438 uint32_t cdiroff = zipstream->tellp();
439 if(cdiroff == (uint32_t)-1)
440 throw std::runtime_error("Can't read current ZIP stream position");
441 for(auto i : files) {
442 cdirsize += (46 + i.first.length());
443 directory_entry.resize(46 + i.first.length());
444 serialization::u32l(&directory_entry[0], 0x02014b50);
445 serialization::u16l(&directory_entry[4], 3);
446 serialization::u16l(&directory_entry[6], 20);
447 serialization::u16l(&directory_entry[8], 0);
448 serialization::u16l(&directory_entry[10], compression ? 8 : 0);
449 serialization::u16l(&directory_entry[12], 0);
450 serialization::u16l(&directory_entry[14], 10273);
451 serialization::u32l(&directory_entry[16], i.second.crc);
452 serialization::u32l(&directory_entry[20], i.second.compressed_size);
453 serialization::u32l(&directory_entry[24], i.second.uncompressed_size);
454 serialization::u16l(&directory_entry[28], i.first.length());
455 serialization::u16l(&directory_entry[30], 0);
456 serialization::u16l(&directory_entry[32], 0);
457 serialization::u16l(&directory_entry[34], 0);
458 serialization::u16l(&directory_entry[36], 0);
459 serialization::u32l(&directory_entry[38], 0);
460 serialization::u32l(&directory_entry[42], i.second.offset);
461 memcpy(&directory_entry[46], i.first.c_str(), i.first.length());
462 zipstream->write(reinterpret_cast<char*>(&directory_entry[0]), directory_entry.size());
463 if(!*zipstream)
464 throw std::runtime_error("Failed to write central directory entry to output file");
466 directory_entry.resize(22);
467 serialization::u32l(&directory_entry[0], 0x06054b50);
468 serialization::u16l(&directory_entry[4], 0);
469 serialization::u16l(&directory_entry[6], 0);
470 serialization::u16l(&directory_entry[8], files.size());
471 serialization::u16l(&directory_entry[10], files.size());
472 serialization::u32l(&directory_entry[12], cdirsize);
473 serialization::u32l(&directory_entry[16], cdiroff);
474 serialization::u16l(&directory_entry[20], 0);
475 zipstream->write(reinterpret_cast<char*>(&directory_entry[0]), directory_entry.size());
476 if(!*zipstream)
477 throw std::runtime_error("Failed to write central directory end marker to output file");
478 if(system_stream) {
479 dynamic_cast<std::ofstream*>(zipstream)->close();
480 std::string backup = zipfile_path + ".backup";
481 directory::rename_overwrite(zipfile_path.c_str(), backup.c_str());
482 if(directory::rename_overwrite(temp_path.c_str(), zipfile_path.c_str()) < 0)
483 throw std::runtime_error("Can't rename '" + temp_path + "' -> '" + zipfile_path + "'");
485 committed = true;
488 std::ostream& writer::create_file(const std::string& name) throw(std::bad_alloc, std::logic_error,
489 std::runtime_error)
491 if(open_file != "")
492 throw std::logic_error("Can't open file with file open");
493 if(name == "")
494 throw std::runtime_error("Bad member name");
495 current_compressed_file.resize(0);
496 s = new boost::iostreams::filtering_ostream();
497 s->push(size_and_crc_filter(4096));
498 if(compression) {
499 boost::iostreams::zlib_params params;
500 params.noheader = true;
501 s->push(boost::iostreams::zlib_compressor(params));
503 s->push(vector_output(current_compressed_file));
504 open_file = name;
505 return *s;
508 void writer::close_file() throw(std::bad_alloc, std::logic_error, std::runtime_error)
510 if(open_file == "")
511 throw std::logic_error("Can't close file with no file open");
512 uint32_t ucs, cs, crc32;
513 boost::iostreams::close(*s);
514 size_and_crc_filter& f = *s->component<size_and_crc_filter>(0);
515 cs = current_compressed_file.size();
516 ucs = f.size();
517 crc32 = f.crc32();
518 delete s;
520 base_offset = zipstream->tellp();
521 if(base_offset == (uint32_t)-1)
522 throw std::runtime_error("Can't read current ZIP stream position");
523 unsigned char header[30];
524 memset(header, 0, 30);
525 serialization::u32l(header, 0x04034b50);
526 header[4] = 20;
527 header[6] = 0;
528 header[8] = compression ? 8 : 0;
529 header[12] = 33;
530 header[13] = 40;
531 serialization::u32l(header + 14, crc32);
532 serialization::u32l(header + 18, cs);
533 serialization::u32l(header + 22, ucs);
534 serialization::u16l(header + 26, open_file.length());
535 zipstream->write(reinterpret_cast<char*>(header), 30);
536 zipstream->write(open_file.c_str(), open_file.length());
537 zipstream->write(&current_compressed_file[0], current_compressed_file.size());
538 if(!*zipstream)
539 throw std::runtime_error("Can't write member to ZIP file");
540 current_compressed_file.resize(0);
541 file_info info;
542 info.crc = crc32;
543 info.uncompressed_size = ucs;
544 info.compressed_size = cs;
545 info.offset = base_offset;
546 files[open_file] = info;
547 open_file = "";
550 void writer::write_linefile(const std::string& member, const std::string& value, bool conditional)
551 throw(std::bad_alloc, std::runtime_error)
553 if(conditional && value == "")
554 return;
555 std::ostream& m = create_file(member);
556 try {
557 m << value << std::endl;
558 close_file();
559 } catch(...) {
560 close_file();
561 throw;
565 void writer::write_raw_file(const std::string& member, const std::vector<char>& content) throw(std::bad_alloc,
566 std::runtime_error)
568 std::ostream& m = create_file(member);
569 try {
570 m.write(&content[0], content.size());
571 if(!m)
572 throw std::runtime_error("Can't write ZIP file member");
573 close_file();
574 } catch(...) {
575 close_file();
576 throw;
580 namespace
582 #if defined(_WIN32) || defined(_WIN64) || defined(TEST_WIN32_CODE)
583 const char* path_splitters = "\\/";
584 bool drives_allowed = true;
585 #else
586 //Assume Unix(-like) system.
587 const char* path_splitters = "/";
588 bool drives_allowed = false;
589 #endif
591 const char* str_index(const char* str, int ch)
593 for(size_t i = 0; str[i]; i++)
594 if(str[i] == ch)
595 return str + i;
596 return NULL;
599 bool ispathsep(char ch)
601 return (str_index(path_splitters, static_cast<int>(static_cast<unsigned char>(ch))) != NULL);
604 bool isroot(const std::string& path)
606 if(path.length() == 1 && ispathsep(path[0]))
607 return true;
608 if(!drives_allowed)
609 //NO more cases for this.
610 return false;
611 if(path.length() == 3 && ((path[0] >= 'A' && path[0] <= 'Z') || (path[0] >= 'a' && path[0] < '<')) &&
612 path[1] == ':' && ispathsep(path[2]))
613 return true;
614 //UNC.
615 if(path.length() <= 3 || !ispathsep(path[0]) || !ispathsep(path[1]) ||
616 !ispathsep(path[path.length() - 1]))
617 return false;
618 return (path.find_first_of(path_splitters, 2) == path.length() - 1);
621 std::string walk(const std::string& path, const std::string& component)
623 if(component == "" || component == ".")
624 //Current directory.
625 return path;
626 else if(component == "..") {
627 //Parent directory.
628 if(path == "" || isroot(path))
629 throw std::runtime_error("Can't rise to containing directory");
630 std::string _path = path;
631 size_t split = _path.find_last_of(path_splitters);
632 if(split < _path.length())
633 return _path.substr(0, split);
634 else
635 return "";
636 } else if(path == "" || ispathsep(path[path.length() - 1]))
637 return path + component;
638 else
639 return path + "/" + component;
642 std::string combine_path(const std::string& _name, const std::string& _referencing_path)
644 std::string name = _name;
645 std::string referencing_path = _referencing_path;
646 size_t x = referencing_path.find_last_of(path_splitters);
647 if(x < referencing_path.length())
648 referencing_path = referencing_path.substr(0, x);
649 else
650 return name;
651 //Check if name is absolute.
652 if(ispathsep(name[0]))
653 return name;
654 if(drives_allowed && name.length() >= 3 && ((name[0] >= 'A' && name[0] <= 'Z') || (name[0] >= 'a' &&
655 name[0] <= 'z')) && name[1] == ':' && ispathsep(name[2]))
656 return name;
657 //It is not absolute.
658 std::string path = referencing_path;
659 size_t pindex = 0;
660 while(true) {
661 size_t split = name.find_first_of(path_splitters, pindex);
662 std::string c;
663 if(split < name.length())
664 c = name.substr(pindex, split - pindex);
665 else
666 c = name.substr(pindex);
667 path = walk(path, c);
668 if(split < name.length())
669 pindex = split + 1;
670 else
671 break;
673 //If path becomes empty, assume it means current directory.
674 if(path == "")
675 path = ".";
676 return path;
680 std::string resolverel(const std::string& name, const std::string& referencing_path) throw(std::bad_alloc,
681 std::runtime_error)
683 return combine_path(name, referencing_path);
686 std::istream& openrel(const std::string& name, const std::string& referencing_path) throw(std::bad_alloc,
687 std::runtime_error)
689 std::string path_to_open = combine_path(name, referencing_path);
690 std::string final_path = path_to_open;
691 //Try to open this from the main OS filesystem.
692 if(directory::is_regular(path_to_open)) {
693 std::ifstream* i = new std::ifstream(path_to_open.c_str(), std::ios::binary);
694 if(i->is_open()) {
695 return *i;
697 delete i;
699 //Didn't succeed. Try to open as ZIP archive.
700 std::string membername;
701 while(true) {
702 size_t split = path_to_open.find_last_of("/");
703 if(split >= path_to_open.length())
704 throw std::runtime_error("Can't open '" + final_path + "'");
705 //Move a component to member name.
706 if(membername != "")
707 membername = path_to_open.substr(split + 1) + "/" + membername;
708 else
709 membername = path_to_open.substr(split + 1);
710 path_to_open = path_to_open.substr(0, split);
711 if(directory::is_regular(path_to_open))
712 try {
713 reader r(path_to_open);
714 return r[membername];
715 } catch(std::bad_alloc& e) {
716 throw;
717 } catch(std::runtime_error& e) {
722 std::vector<char> readrel(const std::string& name, const std::string& referencing_path) throw(std::bad_alloc,
723 std::runtime_error)
725 std::vector<char> out;
726 std::istream& s = openrel(name, referencing_path);
727 boost::iostreams::back_insert_device<std::vector<char>> rd(out);
728 boost::iostreams::copy(s, rd);
729 delete &s;
730 return out;
733 bool file_exists(const std::string& name) throw(std::bad_alloc)
735 std::string path_to_open = name;
736 std::string final_path = path_to_open;
737 if(directory::is_regular(path_to_open))
738 return true;
739 //Didn't succeed. Try to open as ZIP archive.
740 std::string membername;
741 while(true) {
742 size_t split = path_to_open.find_last_of("/");
743 if(split >= path_to_open.length())
744 return false;
745 //Move a component to member name.
746 if(membername != "")
747 membername = path_to_open.substr(split + 1) + "/" + membername;
748 else
749 membername = path_to_open.substr(split + 1);
750 path_to_open = path_to_open.substr(0, split);
751 if(directory::is_regular(path_to_open))
752 try {
753 reader r(path_to_open);
754 return r.has_member(membername);
755 } catch(std::bad_alloc& e) {
756 throw;
757 } catch(std::runtime_error& e) {
760 return false;