htmlshrinker.rb

   1 #!/usr/bin/ruby
   2 # program to replace commonly used <HTML> to shrink size of page
   3
   4 require 'htmlshrinker-data'
   5
   6 class HTMLExpander
   7   def initialize(template, archive, basedir)
   8     file = [%w(skins/common/wikibits.js skins/htmldump/md5.js skins/htmldump/utf8.js skins/htmldump/lookup.js raw/gen.js) , %w(raw/MediaWiki~Common.css raw/MediaWiki~Monobook.css raw/gen.css skins/htmldump/main.css skins/monobook/main.css)]
   9     jscss = ['', '']
  10     pretext = ['<style type="text/css">', '<script type="text/javascript">']
  11     posttext = ['style', 'script']
  12
  13     # (0..1).each do |no|
  14     #   file[no].each do |f|
  15     #     txt = archive.get_article(File.join(basedir, f))
  16     #     puts File.join(basedir,f), txt.size
  17     #     jscss[no] << pretext[no] << txt << posttext[no] unless txt.nil?
  18     #   end
  19     # end
  20     @jstext, @csstext = *jscss
  21     @jstext.gsub!(/var ScriptSuffix(.*?)$/,'')   # includes <script> tag - messes up
  22     @jstext = @jstext.gsub(/\/\*(.*?)\*\//m, '').gsub(/\/\/(.*?)$/, '') # rm comments
  23     @csstext.gsub!(/\/\*(.*?)\*\//m, '')
  24     @csstext.gsub!('@import "../monobook/main.css";', '') # we already included this
  25     @before, @after = template.split(20.chr)
  26 #    @before = @before.gsub("raw", "/raw").gsub("./", "/")
  27 #    @before.gsub!(HTMLShrinker_data::To_be_replaced, @jstext + @csstext)
  28   end
  29
  30   def uncompress(text)
  31     title, text = text.split("\n", 2)
  32     HTMLShrinker_data::Replacements.each {|x, y| text.gsub!(y, x)}
  33     #.gsub(/TITLE/, title).gsub("POINTER", @csstext + @jstext)
  34     result = @before + text + @after
  35     return strip_whitespace(result)
  36   end
  37 end
  38
  39 class HTMLShrinker
  40   def compress(text)
  41     title = (text.match(/"firstHeading">(.*?)\<\/h1>/m) ? Regexp::last_match[1] : "Unnamed")
  42     text = Regexp::last_match[1] if text.match(/ start content -->(.*?)\<\!-- end content /m)
  43     HTMLShrinker_data::Replacements.each {|x, y| text.gsub!(x, y) }
  44     strip_whitespace(text)
  45     text.gsub!(/<img src=(.*?)>/, "")
  46     return [title, text].join("\n")
  47   end
  48
  49   # takes an example html file, extracts the top and bottom, does some replacements
  50   # - this can later be stored and handed to HTMLShrinker at initialization
  51   def extract_template(text)
  52     before = Regexp::last_match.pre_match if text.match(/<\!-- start content -->/)
  53     after = Regexp::last_match.post_match if text.match(/<\!-- end content -->/)
  54     return [before, after].join(20.chr)
  55   end
  56 end
  57
  58 def strip_whitespace(txt)
  59   return txt.gsub(/\t/, " ").gsub('  ',' ').gsub("\n", '')
  60 end