1 # Library for accessing a Wikipedia zdump file.
3 # By Stian Haklev (shaklev@gmail.com), 2007
4 # Released under MIT and GPL licenses
8 # archive = ZArchive.new('eo.zdump')
9 # puts ZArchive.get_article('eo/o/s/l/Oslo.html')
15 self.slice!(-number..-1)
20 def readloc(size, offset)
28 @zdump = File.open(file, 'r')
29 @zdump_loc = @zdump.read(4).unpack('V')[0]
33 loc = get_location(url)
34 return loc ? get_text(*loc) : nil
38 def get_text(block_offset, block_size, offset, size)
39 return ZCompress.uncompress( @zdump.readloc( block_size, block_offset ))[offset, size]
43 md5 = Digest::MD5.hexdigest(url)
44 firstfour = sprintf("%d", ("0x" + md5[0..3]) ).to_i
45 loc = (firstfour * 8) + @zdump_loc
47 start, size = @zdump.readloc(8, loc).unpack('V2')
48 idx = @zdump.readloc(size, start)
49 hex, *coordinates = idx.pop(32).unpack('H32V4') until ( hex == md5 || idx.empty? )
50 return coordinates if hex == md5