Adding option of bzip2 or zlib, marked in the file. Similarily, option of size of...
[zip-doc.git] / zdump.rb
blobea677662a9d73d01bcb9609ec520b7a6c7d5692a
1 #!/usr/bin/ruby
2 # Program that packs a directory tree into a zdump file.
3 # By Stian Haklev (shaklev@gmail.com), 2007
4 # Released under MIT and GPL licenses
6 # Usage: ruby zdump.rb <directory> <output file> <template file>
8 %w(sha1 zarchive find htmlshrinker zutil cgi).each {|x| require x}
9 include ZUtil                              
11 STDOUT.sync = true
13 if ARGV.size == 0  
14   puts "Usage: ruby zdump.rb <directory> <output file>"
15   exit(0)
16 end
18 shrinker = HTMLShrinker.new
19 name = ARGV[1] 
21 t = Time.now
22 base = File.join(ARGV[0], "/")
23 puts "Indexing files in #{base} and writing the file #{name}"
24 to_strip = (base).size
25 archive = ZArchive::Writer.new(name)
27 ignore = ARGV[2] ? Regexp.new(ARGV[2]) : /(Berkas~|Pembicaraan|Templat|Pengguna)/ 
29 template = shrinker.extract_template(File.read(base + "index.html" ))
30 archive.add("__Zdump_Template__", template)
32 no_of_files = 1       
33 all_counter = 1
34 puts "Reading filelist."
35 filelist = []
36 Find.find(base) do |newfile|                     
37   all_counter += 1
38   next if File.directory?(newfile) || !File.readable?(newfile)
39   next if newfile =~ ignore
40   filelist << newfile
41   no_of_files += 1                  
42 end
44 puts "Filelist read, selected #{no_of_files} out of #{all_counter}, making up #{npp(100 * no_of_files.to_f / all_counter.to_f)}%."
45 puts "Beginning to compress."                 
46 t2 = Time.now  
47 filelist.each_with_index do |newfile, counter|
48   if (counter).to_f / 1000.0 == (counter) / 1000
49     page_per_sec = counter.to_f / (Time.now - t2).to_f
50     puts "\n#{counter} pages indexed in #{npp(Time.now - t)} seconds, average #{npp(page_per_sec)} files per second. #{archive.hardlinks.size} redirects, #{npp(archive.hardlinks.size.to_f * 100 / counter.to_f)} percentage of all pages."
51     puts "Estimated time left: #{npp((no_of_files.to_f / page_per_sec) /60)} minutes."
52     STDOUT.print "Writing block: "
53   end             
54   text = shrinker.compress(File.read(newfile))
55   if text[0..2] == "#R "
56     archive.add_hardlink(newfile, text[3..-1])
57   else
58     archive.add(newfile[to_strip..-1], text)
59   end
60 end        
61 filelist = nil # memory cleanup
63 puts "\n\nFinished, flushing index/processing redirects. #{npp(Time.now - t)}"
64 archive.flush # to make sure all blocks have been written