new test for cubito
[donde.git] / dondebatch / guiaoleo.rb
blob1380e9ea631a5b6b5240b2b04cdf757aba9a0ff7
1 require 'net/http'
2 require 'uri'
3 require 'iconv'
5 require 'util'
7 class Guiaoleo
8   @@url = 'http://www.guiaoleo.com.ar/list.php'
9   @@iconv = Iconv.new('UTF-8','ISO-8859-1')
10   attr_reader :pages
12   def fetch_page(page_num)
13     puts 'Fetching page ' + page_num.to_s
14     res = Net::HTTP.get(URI.parse(@@url+Util.build_uri_params({ 
15       'Page' => page_num,
16       'navby' => 'multiply',
17       # For now, hardcode search to Capital only, since addresses returned in list don't include city
18       'zona' => '10,15,31,5,30,19,2,22,32,13,18,7,20,3,23,55,6,1,24,25,4,8,17,12,16,9'
19     })))
20     res = @@iconv.iconv(res)
22     # Parse the list of restaurants. Currently parsed elements:
23     # - name, address, detail URL
24     restaurants = []
25     res.scan(/detail.php.*?\/tr/).each do |row|
26       restaurant = {}
27       row =~ /ID=(\d+)/
28       restaurant['url'] = 'http://www.guiaoleo.com.ar/detail.php?ID=' + $~[1]
29       row =~ />(.*?)</
30       restaurant['name'] = $~[1]
31       cols = row.scan(/<td>.*?<\/td>/)
32       cols[2]  =~ /2>(.+?)</
33       if $~.nil?
34          puts "Failed parsing. COL = [" + cols[2] + "] from ROW = [" + row + "]"
35       else
36         restaurant['address'] = $~[1] + ", capital federal, argentina"
37       end
38       restaurants.push(restaurant)
39     end
40     
41     # Parse the total number of pages
42     @pages=res.scan(/Page=(\d+)/).inject(0) { |max,n| n[0].to_i>max ? n[0].to_i : max }
43     
44     return restaurants
45   end
46 end