Refactor some generic reading/writing routines out of moviefile.cpp
[lsnes.git] / src / library / zip.cpp
blobae45ef83b9a18d74c36538b5bcb930b396c080df
1 #include "zip.hpp"
2 #include "directory.hpp"
3 #include "serialization.hpp"
5 #include <cstdint>
6 #include <cstring>
7 #include <iostream>
8 #include <stdexcept>
9 #include <boost/iostreams/categories.hpp>
10 #include <boost/iostreams/copy.hpp>
11 #include <boost/iostreams/stream.hpp>
12 #include <boost/iostreams/stream_buffer.hpp>
13 #include <boost/iostreams/filter/symmetric.hpp>
14 #include <boost/iostreams/filter/zlib.hpp>
15 #include <boost/iostreams/filtering_stream.hpp>
16 #include <boost/iostreams/device/back_inserter.hpp>
17 #if defined(_WIN32) || defined(_WIN64) || defined(TEST_WIN32_CODE)
18 #include <windows.h>
19 #endif
21 namespace zip
23 int rename_overwrite(const char* oldname, const char* newname)
25 #if defined(_WIN32) || defined(_WIN64) || defined(TEST_WIN32_CODE)
26 return MoveFileEx(oldname, newname, MOVEFILE_REPLACE_EXISTING) ? 0 : -1;
27 #else
28 return rename(oldname, newname);
29 #endif
32 namespace
34 class file_input
36 public:
37 typedef char char_type;
38 typedef boost::iostreams::source_tag category;
39 file_input(std::ifstream& _stream, size_t* _refcnt)
40 : stream(_stream), stream_refcnt(*_refcnt)
42 stream_refcnt++;
43 position = stream.tellg();
44 left_unlimited = true;
47 file_input(std::ifstream& _stream, uint32_t size, size_t* _refcnt)
48 : stream(_stream), stream_refcnt(*_refcnt)
50 stream_refcnt++;
51 position = stream.tellg();
52 left_unlimited = false;
53 left = size;
56 void close()
60 std::streamsize read(char* s, std::streamsize n)
62 stream.clear();
63 stream.seekg(position, std::ios_base::beg);
64 if(stream.fail())
65 throw std::runtime_error("Can't seek ZIP file");
66 if(!left_unlimited && left == 0)
67 return -1;
68 if(!left_unlimited && n > left)
69 n = left;
70 stream.read(s, n);
71 std::streamsize r = stream.gcount();
72 if(r == 0 && stream.fail())
73 throw std::runtime_error("Can't read compressed data from ZIP file");
74 if(!stream && r == 0)
75 return -1;
76 position += r;
77 left -= r;
78 return r;
81 ~file_input()
83 if(!--stream_refcnt) {
84 delete &stream;
85 delete &stream_refcnt;
89 file_input(const file_input& f)
90 : stream(f.stream), stream_refcnt(f.stream_refcnt)
92 stream_refcnt++;
93 position = f.position;
94 left_unlimited = f.left_unlimited;
95 left = f.left;
97 protected:
98 std::ifstream& stream;
99 size_t& stream_refcnt;
100 std::streamoff position;
101 bool left_unlimited;
102 uint32_t left;
103 private:
104 file_input& operator=(const file_input& f);
107 class vector_output
109 public:
110 typedef char char_type;
111 typedef boost::iostreams::sink_tag category;
112 vector_output(std::vector<char>& _stream)
113 : stream(_stream)
117 void close()
121 std::streamsize write(const char* s, std::streamsize n)
123 size_t oldsize = stream.size();
124 stream.resize(oldsize + n);
125 memcpy(&stream[oldsize], s, n);
126 return n;
128 protected:
129 std::vector<char>& stream;
132 class size_and_crc_filter_impl
134 public:
135 typedef char char_type;
137 size_and_crc_filter_impl()
139 dsize = 0;
140 crc = ::crc32(0, NULL, 0);
143 void close()
147 bool filter(const char*& src_begin, const char* src_end, char*& dest_begin, char* dest_end,
148 bool flush)
150 ptrdiff_t amount = src_end - src_begin;
151 if(flush && amount == 0)
152 return false;
153 if(amount > dest_end - dest_begin)
154 amount = dest_end - dest_begin;
155 dsize += amount;
156 crc = ::crc32(crc, reinterpret_cast<const unsigned char*>(src_begin), amount);
157 memcpy(dest_begin, src_begin, amount);
158 src_begin += amount;
159 dest_begin += amount;
160 return true;
163 uint32_t size()
165 return dsize;
168 uint32_t crc32()
170 return crc;
172 private:
173 uint32_t dsize;
174 uint32_t crc;
177 class size_and_crc_filter : public boost::iostreams::symmetric_filter<size_and_crc_filter_impl,
178 std::allocator<char>>
180 typedef symmetric_filter<size_and_crc_filter_impl, std::allocator<char>> base_type;
181 public:
182 typedef typename base_type::char_type char_type;
183 typedef typename base_type::category category;
184 size_and_crc_filter(int bsize)
185 : base_type(bsize)
189 uint32_t size()
191 return filter().size();
194 uint32_t crc32()
196 return filter().crc32();
200 struct zipfile_member_info
202 bool central_directory_special; //Central directory, not real member.
203 uint16_t version_needed;
204 uint16_t flags;
205 uint16_t compression;
206 uint16_t mtime_time;
207 uint16_t mtime_day;
208 uint32_t crc;
209 uint32_t compressed_size;
210 uint32_t uncompressed_size;
211 std::string filename;
212 uint32_t header_offset;
213 uint32_t data_offset;
214 uint32_t next_offset;
217 //Parse member starting from current offset.
218 zipfile_member_info parse_member(std::ifstream& file)
220 zipfile_member_info info;
221 info.central_directory_special = false;
222 info.header_offset = file.tellg();
223 //The file header is 30 bytes (this could also hit central header, but that's even larger).
224 unsigned char buffer[30];
225 if(!(file.read(reinterpret_cast<char*>(buffer), 30)))
226 throw std::runtime_error("Can't read file header from ZIP file");
227 uint32_t magic = serialization::u32l(buffer);
228 if(magic == 0x02014b50) {
229 info.central_directory_special = true;
230 return info;
232 if(magic != 0x04034b50)
233 throw std::runtime_error("ZIP archive corrupt: Expected file or central directory magic");
234 info.version_needed = serialization::u16l(buffer + 4);
235 info.flags = serialization::u16l(buffer + 6);
236 info.compression = serialization::u16l(buffer + 8);
237 info.mtime_time = serialization::u16l(buffer + 10);
238 info.mtime_day = serialization::u16l(buffer + 12);
239 info.crc = serialization::u32l(buffer + 14);
240 info.compressed_size = serialization::u32l(buffer + 18);
241 info.uncompressed_size = serialization::u32l(buffer + 22);
242 uint16_t filename_len = serialization::u16l(buffer + 26);
243 uint16_t extra_len = serialization::u16l(buffer + 28);
244 if(!filename_len)
245 throw std::runtime_error("Unsupported ZIP feature: Empty filename not allowed");
246 if(info.version_needed > 20) {
247 throw std::runtime_error("Unsupported ZIP feature: Only ZIP versions up to 2.0 supported");
249 if(info.flags & 0x2001)
250 throw std::runtime_error("Unsupported ZIP feature: Encryption is not supported");
251 if(info.flags & 0x8)
252 throw std::runtime_error("Unsupported ZIP feature: Indeterminate length not supported");
253 if(info.flags & 0x20)
254 throw std::runtime_error("Unsupported ZIP feature: Binary patching is not supported");
255 if(info.compression != 0 && info.compression != 8)
256 throw std::runtime_error("Unsupported ZIP feature: Unsupported compression method");
257 if(info.compression == 0 && info.compressed_size != info.uncompressed_size)
258 throw std::runtime_error("ZIP archive corrupt: csize ≠ usize for stored member");
259 std::vector<unsigned char> filename_storage;
260 filename_storage.resize(filename_len);
261 if(!(file.read(reinterpret_cast<char*>(&filename_storage[0]), filename_len)))
262 throw std::runtime_error("Can't read file name from zip file");
263 info.filename = std::string(reinterpret_cast<char*>(&filename_storage[0]), filename_len);
264 info.data_offset = info.header_offset + 30 + filename_len + extra_len;
265 info.next_offset = info.data_offset + info.compressed_size;
266 return info;
270 bool reader::has_member(const std::string& name) throw()
272 return (offsets.count(name) > 0);
275 std::string reader::find_first() throw(std::bad_alloc)
277 if(offsets.empty())
278 return "";
279 else
280 return offsets.begin()->first;
283 std::string reader::find_next(const std::string& name) throw(std::bad_alloc)
285 auto i = offsets.upper_bound(name);
286 if(i == offsets.end())
287 return "";
288 else
289 return i->first;
292 std::istream& reader::operator[](const std::string& name) throw(std::bad_alloc, std::runtime_error)
294 if(!offsets.count(name))
295 throw std::runtime_error("No such file '" + name + "' in zip archive");
296 zipstream->clear();
297 zipstream->seekg(offsets[name], std::ios::beg);
298 zipfile_member_info info = parse_member(*zipstream);
299 zipstream->clear();
300 zipstream->seekg(info.data_offset, std::ios::beg);
301 if(info.compression == 0) {
302 return *new boost::iostreams::stream<file_input>(*zipstream, info.uncompressed_size, refcnt);
303 } else if(info.compression == 8) {
304 boost::iostreams::filtering_istream* s = new boost::iostreams::filtering_istream();
305 boost::iostreams::zlib_params params;
306 params.noheader = true;
307 s->push(boost::iostreams::zlib_decompressor(params));
308 s->push(file_input(*zipstream, info.compressed_size, refcnt));
309 return *s;
310 } else
311 throw std::runtime_error("Unsupported ZIP feature: Unsupported compression method");
314 reader::iterator reader::begin() throw(std::bad_alloc)
316 return iterator(offsets.begin());
319 reader::iterator reader::end() throw(std::bad_alloc)
321 return iterator(offsets.end());
324 reader::riterator reader::rbegin() throw(std::bad_alloc)
326 return riterator(offsets.rbegin());
329 reader::riterator reader::rend() throw(std::bad_alloc)
331 return riterator(offsets.rend());
334 reader::~reader() throw()
336 if(!--*refcnt) {
337 delete zipstream;
338 delete refcnt;
342 reader::reader(const std::string& zipfile) throw(std::bad_alloc, std::runtime_error)
344 if(!file_is_regular(zipfile))
345 throw std::runtime_error("Zipfile '" + zipfile + "' is not regular file");
346 zipstream = NULL;
347 refcnt = NULL;
348 try {
349 zipfile_member_info info;
350 info.next_offset = 0;
351 zipstream = new std::ifstream;
352 zipstream->open(zipfile.c_str(), std::ios::binary);
353 refcnt = new size_t;
354 *refcnt = 1;
355 if(!*zipstream)
356 throw std::runtime_error("Can't open zipfile '" + zipfile + "' for reading");
357 do {
358 zipstream->clear();
359 zipstream->seekg(info.next_offset);
360 if(zipstream->fail())
361 throw std::runtime_error("Can't seek ZIP file");
362 info = parse_member(*zipstream);
363 if(info.central_directory_special)
364 break;
365 offsets[info.filename] = info.header_offset;
366 } while(1);
367 } catch(...) {
368 delete zipstream;
369 delete refcnt;
370 throw;
374 bool reader::read_linefile(const std::string& member, std::string& out, bool conditional)
375 throw(std::bad_alloc, std::runtime_error)
377 if(conditional && !has_member(member))
378 return false;
379 std::istream& m = (*this)[member];
380 try {
381 std::getline(m, out);
382 istrip_CR(out);
383 delete &m;
384 } catch(...) {
385 delete &m;
386 throw;
390 void reader::read_raw_file(const std::string& member, std::vector<char>& out) throw(std::bad_alloc,
391 std::runtime_error)
393 std::vector<char> _out;
394 std::istream& m = (*this)[member];
395 try {
396 boost::iostreams::back_insert_device<std::vector<char>> rd(_out);
397 boost::iostreams::copy(m, rd);
398 delete &m;
399 } catch(...) {
400 delete &m;
401 throw;
403 out = _out;
406 writer::writer(const std::string& zipfile, unsigned _compression) throw(std::bad_alloc, std::runtime_error)
408 compression = _compression;
409 zipfile_path = zipfile;
410 temp_path = zipfile + ".tmp";
411 zipstream = new std::ofstream(temp_path.c_str(), std::ios::binary);
412 if(!*zipstream)
413 throw std::runtime_error("Can't open zipfile '" + temp_path + "' for writing");
414 committed = false;
415 system_stream = true;
418 writer::writer(std::ostream& stream, unsigned _compression) throw(std::bad_alloc, std::runtime_error)
420 compression = _compression;
421 zipstream = &stream;
422 committed = false;
423 system_stream = false;
426 writer::~writer() throw()
428 if(!committed && system_stream)
429 remove(temp_path.c_str());
430 if(system_stream)
431 delete zipstream;
434 void writer::commit() throw(std::bad_alloc, std::logic_error, std::runtime_error)
436 if(committed)
437 throw std::logic_error("Can't commit twice");
438 if(open_file != "")
439 throw std::logic_error("Can't commit with file open");
440 std::vector<unsigned char> directory_entry;
441 uint32_t cdirsize = 0;
442 uint32_t cdiroff = zipstream->tellp();
443 if(cdiroff == (uint32_t)-1)
444 throw std::runtime_error("Can't read current ZIP stream position");
445 for(auto i : files) {
446 cdirsize += (46 + i.first.length());
447 directory_entry.resize(46 + i.first.length());
448 serialization::u32l(&directory_entry[0], 0x02014b50);
449 serialization::u16l(&directory_entry[4], 3);
450 serialization::u16l(&directory_entry[6], 20);
451 serialization::u16l(&directory_entry[8], 0);
452 serialization::u16l(&directory_entry[10], compression ? 8 : 0);
453 serialization::u16l(&directory_entry[12], 0);
454 serialization::u16l(&directory_entry[14], 10273);
455 serialization::u32l(&directory_entry[16], i.second.crc);
456 serialization::u32l(&directory_entry[20], i.second.compressed_size);
457 serialization::u32l(&directory_entry[24], i.second.uncompressed_size);
458 serialization::u16l(&directory_entry[28], i.first.length());
459 serialization::u16l(&directory_entry[30], 0);
460 serialization::u16l(&directory_entry[32], 0);
461 serialization::u16l(&directory_entry[34], 0);
462 serialization::u16l(&directory_entry[36], 0);
463 serialization::u32l(&directory_entry[38], 0);
464 serialization::u32l(&directory_entry[42], i.second.offset);
465 memcpy(&directory_entry[46], i.first.c_str(), i.first.length());
466 zipstream->write(reinterpret_cast<char*>(&directory_entry[0]), directory_entry.size());
467 if(!*zipstream)
468 throw std::runtime_error("Failed to write central directory entry to output file");
470 directory_entry.resize(22);
471 serialization::u32l(&directory_entry[0], 0x06054b50);
472 serialization::u16l(&directory_entry[4], 0);
473 serialization::u16l(&directory_entry[6], 0);
474 serialization::u16l(&directory_entry[8], files.size());
475 serialization::u16l(&directory_entry[10], files.size());
476 serialization::u32l(&directory_entry[12], cdirsize);
477 serialization::u32l(&directory_entry[16], cdiroff);
478 serialization::u16l(&directory_entry[20], 0);
479 zipstream->write(reinterpret_cast<char*>(&directory_entry[0]), directory_entry.size());
480 if(!*zipstream)
481 throw std::runtime_error("Failed to write central directory end marker to output file");
482 if(system_stream) {
483 dynamic_cast<std::ofstream*>(zipstream)->close();
484 std::string backup = zipfile_path + ".backup";
485 zip::rename_overwrite(zipfile_path.c_str(), backup.c_str());
486 if(zip::rename_overwrite(temp_path.c_str(), zipfile_path.c_str()) < 0)
487 throw std::runtime_error("Can't rename '" + temp_path + "' -> '" + zipfile_path + "'");
489 committed = true;
492 std::ostream& writer::create_file(const std::string& name) throw(std::bad_alloc, std::logic_error,
493 std::runtime_error)
495 if(open_file != "")
496 throw std::logic_error("Can't open file with file open");
497 if(name == "")
498 throw std::runtime_error("Bad member name");
499 current_compressed_file.resize(0);
500 s = new boost::iostreams::filtering_ostream();
501 s->push(size_and_crc_filter(4096));
502 if(compression) {
503 boost::iostreams::zlib_params params;
504 params.noheader = true;
505 s->push(boost::iostreams::zlib_compressor(params));
507 s->push(vector_output(current_compressed_file));
508 open_file = name;
509 return *s;
512 void writer::close_file() throw(std::bad_alloc, std::logic_error, std::runtime_error)
514 if(open_file == "")
515 throw std::logic_error("Can't close file with no file open");
516 uint32_t ucs, cs, crc32;
517 boost::iostreams::close(*s);
518 size_and_crc_filter& f = *s->component<size_and_crc_filter>(0);
519 cs = current_compressed_file.size();
520 ucs = f.size();
521 crc32 = f.crc32();
522 delete s;
524 base_offset = zipstream->tellp();
525 if(base_offset == (uint32_t)-1)
526 throw std::runtime_error("Can't read current ZIP stream position");
527 unsigned char header[30];
528 memset(header, 0, 30);
529 serialization::u32l(header, 0x04034b50);
530 header[4] = 20;
531 header[6] = 0;
532 header[8] = compression ? 8 : 0;
533 header[12] = 33;
534 header[13] = 40;
535 serialization::u32l(header + 14, crc32);
536 serialization::u32l(header + 18, cs);
537 serialization::u32l(header + 22, ucs);
538 serialization::u16l(header + 26, open_file.length());
539 zipstream->write(reinterpret_cast<char*>(header), 30);
540 zipstream->write(open_file.c_str(), open_file.length());
541 zipstream->write(&current_compressed_file[0], current_compressed_file.size());
542 if(!*zipstream)
543 throw std::runtime_error("Can't write member to ZIP file");
544 current_compressed_file.resize(0);
545 file_info info;
546 info.crc = crc32;
547 info.uncompressed_size = ucs;
548 info.compressed_size = cs;
549 info.offset = base_offset;
550 files[open_file] = info;
551 open_file = "";
554 void writer::write_linefile(const std::string& member, const std::string& value, bool conditional)
555 throw(std::bad_alloc, std::runtime_error)
557 if(conditional && value == "")
558 return;
559 std::ostream& m = create_file(member);
560 try {
561 m << value << std::endl;
562 close_file();
563 } catch(...) {
564 close_file();
565 throw;
569 void writer::write_raw_file(const std::string& member, const std::vector<char>& content) throw(std::bad_alloc,
570 std::runtime_error)
572 std::ostream& m = create_file(member);
573 try {
574 m.write(&content[0], content.size());
575 if(!m)
576 throw std::runtime_error("Can't write ZIP file member");
577 close_file();
578 } catch(...) {
579 close_file();
580 throw;
584 namespace
586 #if defined(_WIN32) || defined(_WIN64) || defined(TEST_WIN32_CODE)
587 const char* path_splitters = "\\/";
588 bool drives_allowed = true;
589 #else
590 //Assume Unix(-like) system.
591 const char* path_splitters = "/";
592 bool drives_allowed = false;
593 #endif
595 const char* str_index(const char* str, int ch)
597 for(size_t i = 0; str[i]; i++)
598 if(str[i] == ch)
599 return str + i;
600 return NULL;
603 bool ispathsep(char ch)
605 return (str_index(path_splitters, static_cast<int>(static_cast<unsigned char>(ch))) != NULL);
608 bool isroot(const std::string& path)
610 if(path.length() == 1 && ispathsep(path[0]))
611 return true;
612 if(!drives_allowed)
613 //NO more cases for this.
614 return false;
615 if(path.length() == 3 && ((path[0] >= 'A' && path[0] <= 'Z') || (path[0] >= 'a' && path[0] < '<')) &&
616 path[1] == ':' && ispathsep(path[2]))
617 return true;
618 //UNC.
619 if(path.length() <= 3 || !ispathsep(path[0]) || !ispathsep(path[1]) ||
620 !ispathsep(path[path.length() - 1]))
621 return false;
622 return (path.find_first_of(path_splitters, 2) == path.length() - 1);
625 std::string walk(const std::string& path, const std::string& component)
627 if(component == "" || component == ".")
628 //Current directory.
629 return path;
630 else if(component == "..") {
631 //Parent directory.
632 if(path == "" || isroot(path))
633 throw std::runtime_error("Can't rise to containing directory");
634 std::string _path = path;
635 size_t split = _path.find_last_of(path_splitters);
636 if(split < _path.length())
637 return _path.substr(0, split);
638 else
639 return "";
640 } else if(path == "" || ispathsep(path[path.length() - 1]))
641 return path + component;
642 else
643 return path + "/" + component;
646 std::string combine_path(const std::string& _name, const std::string& _referencing_path)
648 std::string name = _name;
649 std::string referencing_path = _referencing_path;
650 size_t x = referencing_path.find_last_of(path_splitters);
651 if(x < referencing_path.length())
652 referencing_path = referencing_path.substr(0, x);
653 else
654 return name;
655 //Check if name is absolute.
656 if(ispathsep(name[0]))
657 return name;
658 if(drives_allowed && name.length() >= 3 && ((name[0] >= 'A' && name[0] <= 'Z') || (name[0] >= 'a' &&
659 name[0] <= 'z')) && name[1] == ':' && ispathsep(name[2]))
660 return name;
661 //It is not absolute.
662 std::string path = referencing_path;
663 size_t pindex = 0;
664 while(true) {
665 size_t split = name.find_first_of(path_splitters, pindex);
666 std::string c;
667 if(split < name.length())
668 c = name.substr(pindex, split - pindex);
669 else
670 c = name.substr(pindex);
671 path = walk(path, c);
672 if(split < name.length())
673 pindex = split + 1;
674 else
675 break;
677 //If path becomes empty, assume it means current directory.
678 if(path == "")
679 path = ".";
680 return path;
684 std::string resolverel(const std::string& name, const std::string& referencing_path) throw(std::bad_alloc,
685 std::runtime_error)
687 return combine_path(name, referencing_path);
690 std::istream& openrel(const std::string& name, const std::string& referencing_path) throw(std::bad_alloc,
691 std::runtime_error)
693 std::string path_to_open = combine_path(name, referencing_path);
694 std::string final_path = path_to_open;
695 //Try to open this from the main OS filesystem.
696 if(file_is_regular(path_to_open)) {
697 std::ifstream* i = new std::ifstream(path_to_open.c_str(), std::ios::binary);
698 if(i->is_open()) {
699 return *i;
701 delete i;
703 //Didn't succeed. Try to open as ZIP archive.
704 std::string membername;
705 while(true) {
706 size_t split = path_to_open.find_last_of("/");
707 if(split >= path_to_open.length())
708 throw std::runtime_error("Can't open '" + final_path + "'");
709 //Move a component to member name.
710 if(membername != "")
711 membername = path_to_open.substr(split + 1) + "/" + membername;
712 else
713 membername = path_to_open.substr(split + 1);
714 path_to_open = path_to_open.substr(0, split);
715 if(file_is_regular(path_to_open))
716 try {
717 reader r(path_to_open);
718 return r[membername];
719 } catch(std::bad_alloc& e) {
720 throw;
721 } catch(std::runtime_error& e) {
726 std::vector<char> readrel(const std::string& name, const std::string& referencing_path) throw(std::bad_alloc,
727 std::runtime_error)
729 std::vector<char> out;
730 std::istream& s = openrel(name, referencing_path);
731 boost::iostreams::back_insert_device<std::vector<char>> rd(out);
732 boost::iostreams::copy(s, rd);
733 delete &s;
734 return out;
737 bool file_exists(const std::string& name) throw(std::bad_alloc)
739 std::string path_to_open = name;
740 std::string final_path = path_to_open;
741 if(file_is_regular(path_to_open))
742 return true;
743 //Didn't succeed. Try to open as ZIP archive.
744 std::string membername;
745 while(true) {
746 size_t split = path_to_open.find_last_of("/");
747 if(split >= path_to_open.length())
748 return false;
749 //Move a component to member name.
750 if(membername != "")
751 membername = path_to_open.substr(split + 1) + "/" + membername;
752 else
753 membername = path_to_open.substr(split + 1);
754 path_to_open = path_to_open.substr(0, split);
755 if(file_is_regular(path_to_open))
756 try {
757 reader r(path_to_open);
758 return r.has_member(membername);
759 } catch(std::bad_alloc& e) {
760 throw;
761 } catch(std::runtime_error& e) {
764 return false;