2 # program to replace commonly used <HTML> to shrink size of page
4 require 'htmlshrinker-data'
7 def initialize(template, archive, basedir)
8 file = [%w(skins/common/wikibits.js skins/htmldump/md5.js skins/htmldump/utf8.js skins/htmldump/lookup.js raw/gen.js) , %w(raw/MediaWiki~Common.css raw/MediaWiki~Monobook.css raw/gen.css skins/htmldump/main.css skins/monobook/main.css)]
10 pretext = ['<style type="text/css">', '<script type="text/javascript">']
11 posttext = ['style', 'script']
14 # file[no].each do |f|
15 # txt = archive.get_article(File.join(basedir, f))
16 # puts File.join(basedir,f), txt.size
17 # jscss[no] << pretext[no] << txt << posttext[no] unless txt.nil?
20 @jstext, @csstext = *jscss
21 @jstext.gsub!(/var ScriptSuffix(.*?)$/,'') # includes <script> tag - messes up
22 @jstext = @jstext.gsub(/\/\*(.*?)\*\//m, '').gsub(/\/\/(.*?)$/, '') # rm comments
23 @csstext.gsub!(/\/\*(.*?)\*\//m, '')
24 @csstext.gsub!('@import "../monobook/main.css";', '') # we already included this
25 @before, @after = template.split(20.chr)
26 # @before = @before.gsub("raw", "/raw").gsub("./", "/")
27 # @before.gsub!(HTMLShrinker_data::To_be_replaced, @jstext + @csstext)
31 title, text = text.split("\n", 2)
32 HTMLShrinker_data::Replacements.each {|x, y| text.gsub!(y, x)}
33 #.gsub(/TITLE/, title).gsub("POINTER", @csstext + @jstext)
34 result = @before + text + @after
35 return strip_whitespace(result)
41 title = (text.match(/"firstHeading">(.*?)\<\/h1>/m) ? Regexp::last_match[1] : "Unnamed")
42 text = Regexp::last_match[1] if text.match(/ start content -->(.*?)\<\!-- end content /m)
43 HTMLShrinker_data::Replacements.each {|x, y| text.gsub!(x, y) }
44 strip_whitespace(text)
45 text.gsub!(/<img src=(.*?)>/, "")
46 return [title, text].join("\n")
49 # takes an example html file, extracts the top and bottom, does some replacements
50 # - this can later be stored and handed to HTMLShrinker at initialization
51 def extract_template(text)
52 before = Regexp::last_match.pre_match if text.match(/<\!-- start content -->/)
53 after = Regexp::last_match.post_match if text.match(/<\!-- end content -->/)
54 return [before, after].join(20.chr)
58 def strip_whitespace(txt)
59 return txt.gsub(/\t/, " ").gsub(' ',' ').gsub("\n", '')