mongrel-web.rb

   1 #!/usr/bin/ruby
   2 # Web server for viewing zdump files.
   3 # By Stian Haklev (shaklev@gmail.com), 2007
   4 # Released under MIT and GPL licenses
   5 #
   6 # Usage:
   7 # ruby mongrel-web.rb <zdumpfile> <path-prefix>
   8
   9 %w(webrick zarchive htmlshrinker rubygems trollop).each {|x| require x}
  10 include WEBrick
  11 Archive = ZArchive::Reader.new(ARGV[0])
  12 template = Archive.get('__Zdump_Template__')
  13 Htmlshrink = HTMLExpander.new(template, Archive)
  14 Cache = {}
  15
  16 # do commandline parsing
  17 opts = Trollop::options do
  18   version "mongrel-web 0.1 (c) 2007 Stian Haklev (MIT/GPL)"
  19   banner <<-EOS
  20 mongrel-web.rb is part of the zip-doc suite. It serves the contents of a .zdump file dynamically to localhost, allowing you to browse a wikipedia offline.
  21
  22 Usage:
  23        ruby mongrel-web.rb [options] <filename.zdump>
  24        (for example ruby mongrel-web.rb ../Downloads/id.zdump)
  25 where [options] are:
  26 EOS
  27
  28   opt :sizes, "Insert sizes after each link, and change font-size based on size of linked-to article (very slow)"
  29   opt :prefix, "Insert a given prefix before any url - should not be necessary with standard zdump files", :type => :string, :default => ''
  30 end
  31
  32 Base = opts[:prefix]
  33
  34 class NilLog
  35   def <<; end
  36   def info(*args); end
  37   def error(*args); end
  38   def warn(*args); end
  39   def debug(*args); end
  40   def debug?(*args); false; end
  41 end
  42
  43 def do_sizes(file, arc)
  44   content = file.match(/\<div id="contentSub"\>(.*?)<div class="printfooter">/m)[1]
  45   ary = []
  46   content.scan(/a href="\.\.\/\.\.\/\.\.\/(.*?)"(.*?)>(.*?)<\/a>/) do |match|
  47     ary << match
  48   end
  49   ary.each do |match|
  50     size = arc.get_size(ZUtil::url_unescape(match[0])) / 1000
  51       fsize = '-1' if size < 15
  52       fsize = '+1' if size > 50
  53     content.sub!("<a href=\"../../../#{match[0]}\"#{match[1]}>#{match[2]}</a>", "<a href='/#{match[0]}' #{match[1]}><font size=#{fsize}>#{match[2]} (#{size}k)</font></a>")
  54   end
  55   content = '<div id="contentSub">' + content + "<div class='printfooter'>"
  56   file.gsub!(/\<div id="contentSub"\>(.*?)<div class="printfooter">/m, content)
  57 end
  58
  59 wiki_proc = lambda do |req, resp|
  60   resp['Content-Type'] = "text/html"
  61   url = ZUtil::url_unescape(req.unparsed_uri[1..-1])
  62   t = Time.now
  63   url = "index.html" if url.empty?
  64   from_cache = ''
  65
  66   # if style/js
  67   if url =~ /(raw|skins|images)\/(.*?)$/
  68     url = Base + Regexp::last_match[0]
  69     if Cache[url]
  70       text = Cache[url]
  71       from_cache = 'from cache '
  72     else
  73       text = Archive.get(url)
  74       return if text.nil?
  75       line1, line2 = text.split("\n",2)
  76       text = line2 if line1 == 'Unnamed'
  77       Cache[url] = text
  78     end
  79     resp.body = text
  80   else
  81     txt = Archive.get(url)
  82     txt ||= "Not found\n\nSorry, article #{url} not found"
  83     txt = Htmlshrink.uncompress(txt)
  84     txt = do_sizes(txt, Archive) if opts[:sizes]
  85     resp.body = ( txt )
  86
  87   end
  88   resp["Content-Type"] = case url
  89   when /\.js$/: "text/javascript"
  90   when /\.css$/: "text/css"
  91   when /\.html$/: "text/html"
  92   end
  93   puts "Got #{url} #{from_cache}in #{"%2.3f" % (Time.now - t)} seconds."
  94 end
  95
  96 wiki = HTTPServlet::ProcHandler.new(wiki_proc)
  97 SERVER = HTTPServer.new(:Port => 2042, :DocumentRoot => Dir.pwd, :Logger => NilLog.new )
  98
  99 SERVER.mount("/", wiki)
 100 trap("INT"){ SERVER.shutdown;  exit(0) }
 101 puts "Server started, website accessible at http://localhost:2042."
 102 SERVER.start