5 require 'rexml/document'
8 initial_url = URI.parse ARGV[0]
9 limit_url = URI.parse ARGV[1]
15 current_url = urls.shift
16 current_url.route_from(limit_url)
19 page = REXML::Document.new(Net::HTTP.get(current_url))
21 rescue REXML::ParseException
22 puts "erreur XML dans " + current_url
27 page.elements.each('//a[@href]') do |anchor|
28 url = URI.parse(anchor.attributes['href'])
31 url = current_url + url
44 end while not urls.empty?