1 # Library for accessing a Wikipedia zdump file.
3 # By Stian Haklev (shaklev@gmail.com), 2007
4 # Released under MIT and GPL licenses
8 # archive = ZArchive.new('eo.zdump')
9 # puts ZArchive.get_article('eo/o/s/l/Oslo.html')
16 self.slice!(-number..-1)
21 def readloc(size, offset)
29 @zdump = File.open(file, 'r')
30 @zdump_loc = @zdump.read(4).unpack('V')[0]
34 loc = get_location(url)
35 return loc ? get_text(*loc) : nil
39 def get_text(block_offset, block_size, offset, size)
40 return ZCompress.uncompress( @zdump.readloc( block_size, block_offset ))[offset, size]
44 md5 = Digest::MD5.hexdigest(url)
45 firstfour = sprintf("%d", ("0x" + md5[0..3]) ).to_i
46 loc = (firstfour * 8) + @zdump_loc
47 start, size = @zdump.readloc(8, loc).unpack('V2')
48 idx = @zdump.readloc(size, start)
49 hex, *coordinates = idx.pop(32).unpack('H32V4') until ( hex == md5 || idx.empty? )
50 return coordinates if hex == md5