From 2b8879108ab08664c67b7391f06dc65e9172790d Mon Sep 17 00:00:00 2001 From: John Date: Sat, 21 Apr 2012 19:36:22 +0300 Subject: [PATCH] redrafted fetcher with watir-webdriver --- fetch.rb | 25 ------------------------- w.rb | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 25 deletions(-) delete mode 100755 fetch.rb create mode 100755 w.rb diff --git a/fetch.rb b/fetch.rb deleted file mode 100755 index 31285a6..0000000 --- a/fetch.rb +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/ruby - -require 'mechanize' -require 'logger' - -agent = Mechanize.new -agent.log = Logger.new "mech.log" -agent.user_agent_alias = 'Mac Safari' - -page = agent.get 'http://catalog.flatworldknowledge.com/bookhub/reader/21' - -#page.search('ul#toc > li')[3..-1].each { |li| - -anchor = page.search('ul#toc > li')[5].xpath('a') - -Link.new.click - -%Q{ -.each { |l| - p l -} -} - - - diff --git a/w.rb b/w.rb new file mode 100755 index 0000000..d1af456 --- /dev/null +++ b/w.rb @@ -0,0 +1,43 @@ +#!/usr/bin/ruby + +require 'watir-webdriver' + +def remove_tooltip browser + tooltip = browser.div(:id => 'ui-tooltip-0') + tooltip.wait_until_present + puts 'tooltip appeared' + + tooltip.a(:class => 'ui-tooltip-close').click + puts 'tooltip clicked' +end + +browser = Watir::Browser.start 'http://catalog.flatworldknowledge.com/bookhub/reader/21' + +remove_tooltip browser + +lis = browser.ul(:id => 'toc').lis(:xpath => './*') + +loader = browser.div(:id => 'book-main').div(:id => 'content-loader') +wrapper = browser.div(:id => 'book-main').div(:id => 'book-content') + + +lis[5].a(:xpath => './*').click + +loader.wait_while_present + +content = wrapper.elements(:xpath => './div/*') + +File.open('dump', 'w') do |f| + content.each do |item| + f.puts item.html + end +end + +puts 'dumped the content' + + + +sleep 5 + +browser.close + -- 2.11.4.GIT