Start working with Class::DBI and make file layout a little more sane.
[kamitsukai.git] / scripts / isbn_lookup.pl
blob0036372a8a7ec5cf26260232e0d80397583e00d0
1 #! /usr/bin/perl
3 use strict;
4 use LWP::Simple qw($ua get);
5 use XML::XPath;
6 use DBI;
8 #use Data::Dumper;
10 # Define parts of the DB connection.
11 my $database = "";
12 my $hostname = "";
13 my $port = "3306";
14 my $user = "";
15 my $password = "";
17 my $dsn = "DBI:mysql:database=$database;host=$hostname;port=$port";
18 my $dbh = DBI->connect( $dsn, $user, $password );
20 # Define parts of the REST request.
21 my $baseurl = "http://ecs.amazonaws.com/onca/xml";
22 my $service = "AWSECommerceService";
23 my $accessKey = "";
24 my $operation = "ItemLookup";
25 my $searchindex = "Books";
26 my $responseGroups = "Request,ItemAttributes,Images";
27 my $associateTag = "kormilbar-20";
28 my $version = "2007-02-22";
30 #my ($lookup_type, @lookup_ids) = @{get_queue_items()} or die("Nothing to lookup.\n");
31 my ( $lookup_type, @lookup_ids ) = @{ get_queue_items() } or die();
33 #print join(', ', $lookup_type, @lookup_ids) . "\n";
35 #my $lookup_type = 'ISBN';
36 #my @lookup_ids = ('0061020710', '0061020702', '0380788624', '032144325X', '0441009425', '0312876351', '1576104656', '0345347951', '0765312662', '1400049628');
38 my $response = lookup_item( $lookup_type, @lookup_ids );
40 #print $response;
42 my @foundISBNs;
43 parse_xml($response);
45 cleanup_queue();
47 #refresh_old_data();
49 $dbh->disconnect();
51 sub lookup_item {
52 my ( $type, @ids ) = @_;
54 # Assemble the REST request URL.
55 my $request = "$baseurl?"
56 . "Service=$service&"
57 . "AWSAccessKeyId=$accessKey&"
58 . "Operation=$operation&"
59 . "IdType=$type&"
60 . "ItemId="
61 . join( ',', @ids ) . "&"
62 . "SearchIndex=$searchindex&"
63 . "ResponseGroup=$responseGroups&"
64 . "AssociateTag=$associateTag&"
65 . "Version=$version";
67 #print "Request: " . $request . "\n";
69 # Send the request using HTTP GET.
70 my $ua = new LWP::UserAgent;
71 $ua->timeout(30);
72 my $response = get($request);
74 return $response;
75 } ## end sub lookup_item
77 sub get_queue_items {
78 my @ids;
79 my $type = undef;
81 #my $sql_lookup = 'SELECT `id`, `type` FROM `library_queue_amazon` WHERE `type` = (SELECT `type` FROM `library_queue_amazon` ORDER BY `inserted` DESC LIMIT 1) ORDER BY `inserted` DESC LIMIT 10';
82 my $sql_lookup = 'SELECT `id`, `type` FROM `library_queue_amazon` WHERE `type` = (SELECT `type` FROM `library_queue_amazon` LIMIT 1) LIMIT 10';
84 my $sth = $dbh->prepare($sql_lookup);
86 $sth->execute();
88 my $sthRemoveQueue = $dbh->prepare("DELETE FROM `library_queue_amazon` WHERE `id` = ? AND `type` = ? LIMIT 1");
90 while ( my @resultRow = $sth->fetchrow_array() ) {
91 push @ids, $resultRow[0];
92 $type = $resultRow[1];
93 $sthRemoveQueue->execute( $resultRow[0], $type );
96 if ( defined($type) ) {
97 my @ret;
98 push @ret, $type;
99 push @ret, @ids;
101 return \@ret;
102 } else {
103 return undef;
105 } ## end sub get_queue_items
107 sub parse_xml {
108 my $response = shift(@_);
110 my $xp = XML::XPath->new( xml => $response );
112 my $sthGetUUID = $dbh->prepare('SELECT UUID()');
113 my $sthInsertBook =
114 $dbh->prepare( 'INSERT INTO `library_book_info` '
115 . '(`id`,`isbn`,`ean`,`title`,`binding`,`deweydecimal`,`publisher`,`published`,`released`,`studio`,`pages`,`height`,`heightunits`,`length`,`lengthunits`,`width`,`widthunits`,`weight`,`weightunits`,`asin`,`detailpageurl`) '
116 . 'VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)' );
117 my $sthInsertImage = $dbh->prepare('INSERT INTO `library_amazon_images` (`asin`,`category`,`size`,`width`,`height`,`url`) VALUES (?,?,?,?,?,?)');
118 my $sthInsertAuthor = $dbh->prepare('INSERT INTO `library_book_authors` (`id`,`author`) VALUES (?,?)');
119 my $i = 1;
120 while ( $xp->find("/ItemLookupResponse/Items/Item[$i]") ) {
121 $sthGetUUID->execute();
123 my $uuid = $sthGetUUID->fetchrow_array();
125 my $productGroup = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/ProductGroup");
126 $productGroup = 'NULL' if ( !defined($productGroup) );
128 my $title = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/Title");
129 $title = 'NULL' if ( !defined($title) );
131 my $isbn = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/ISBN");
132 $isbn = 'NULL' if ( !defined($isbn) );
134 my $ean = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/EAN");
135 $ean = 'NULL' if ( !defined($ean) );
137 my $binding = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/Binding");
138 $binding = 'NULL' if ( !defined($binding) );
140 my $deweyDecimal = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/DeweyDecimalNumber");
141 $deweyDecimal = 'NULL' if ( !defined($deweyDecimal) );
143 my $publisher = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/Publisher");
144 $publisher = 'NULL' if ( !defined($publisher) );
146 my $published = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/PublicationDate");
147 $published = 'NULL' if ( !defined($published) );
149 my $released = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/ReleaseDate");
150 $released = 'NULL' if ( !defined($released) );
152 my $studio = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/Studio");
153 $studio = 'NULL' if ( !defined($studio) );
155 my $pages = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/NumberOfPages");
156 $pages = 'NULL' if ( !defined($pages) );
158 my $height = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/PackageDimensions/Height");
159 $height = 'NULL' if ( !defined($height) );
161 my $heightUnits = $xp->findvalue( "/ItemLookupResponse/Items/Item[$i]/ItemAttributes/PackageDimensions/Height/" . '@Units' );
162 $heightUnits = 'NULL' if ( !defined($heightUnits) );
164 my $length = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/PackageDimensions/Length");
165 $length = 'NULL' if ( !defined($length) );
167 my $lengthUnits = $xp->findvalue( "/ItemLookupResponse/Items/Item[$i]/ItemAttributes/PackageDimensions/Length/" . '@Units' );
168 $lengthUnits = 'NULL' if ( !defined($lengthUnits) );
170 my $width = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/PackageDimensions/Width");
171 $width = 'NULL' if ( !defined($width) );
173 my $widthUnits = $xp->findvalue( "/ItemLookupResponse/Items/Item[$i]/ItemAttributes/PackageDimensions/Width/" . '@Units' );
174 $widthUnits = 'NULL' if ( !defined($widthUnits) );
176 my $weight = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/PackageDimensions/Weight");
177 $weight = 'NULL' if ( !defined($weight) );
179 my $weightUnits = $xp->findvalue( "/ItemLookupResponse/Items/Item[$i]/ItemAttributes/PackageDimensions/Weight/" . '@Units' );
180 $weightUnits = 'NULL' if ( !defined($weightUnits) );
182 my $asin = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ASIN");
183 $asin = 'NULL' if ( !defined($asin) );
185 my $detailPageURL = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/DetailPageURL");
186 $detailPageURL = 'NULL' if ( !defined($detailPageURL) );
188 $sthInsertBook->execute(
189 $uuid, $isbn, $ean, $title, $binding, $deweyDecimal, $publisher,
190 $published, $released, $studio, $pages, $height, $heightUnits, $length,
191 $lengthUnits, $width, $widthUnits, $weight, $weightUnits, $asin, $detailPageURL
194 my @authors;
195 for ( my $j = 1 ; $j <= $xp->findvalue("count(/ItemLookupResponse/Items/Item[$i]/ItemAttributes/Author)") ; $j++ ) {
196 $sthInsertAuthor->execute( $uuid, $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/Author[$j]") );
197 push( @authors, $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ItemAttributes/Author[$j]") );
200 push( @foundISBNs, $isbn );
202 # print "\n\nItem#: $i\n";
203 # print "Title: $title\n";
204 # print "Author(s): " . join(', ', @authors) . "\n";
205 # print "ISBN: $isbn\n";
206 # print "EAN: $ean\n";
207 # print "Binding: $binding\n";
208 # print "Dewey Decimal: $deweyDecimal\n";
209 # print "Publisher: $publisher\n";
210 # print "Published: $published\n";
211 # print "Released: $released\n";
212 # print "Studio: $studio\n";
213 # print "Pages: $pages\n";
214 # print "Height: $height $heightUnits\n";
215 # print "Length: $length $lengthUnits\n";
216 # print "Width: $width $widthUnits\n";
217 # print "Weight: $weight $weightUnits\n";
218 # print "ASIN: $asin\n";
219 # print "Detail Page: $detailPageURL\n";
221 my %categories;
222 for ( my $j = 1 ; $j <= $xp->findvalue("count(/ItemLookupResponse/Items/Item[$i]/ImageSets/ImageSet)") ; $j++ ) {
223 my $category = $xp->findvalue( "/ItemLookupResponse/Items/Item[$i]/ImageSets/ImageSet[$j]/" . '@Category' );
224 my $smallLink = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ImageSets/ImageSet[$j]/SmallImage/URL");
225 my $smallHeight = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ImageSets/ImageSet[$j]/SmallImage/Height");
226 my $smallWidth = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ImageSets/ImageSet[$j]/SmallImage/Width");
227 my $mediumLink = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ImageSets/ImageSet[$j]/MediumImage/URL");
228 my $mediumHeight = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ImageSets/ImageSet[$j]/MediumImage/Height");
229 my $mediumWidth = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ImageSets/ImageSet[$j]/MediumImage/Width");
230 my $largeLink = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ImageSets/ImageSet[$j]/LargeImage/URL");
231 my $largeHeight = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ImageSets/ImageSet[$j]/LargeImage/Height");
232 my $largeWidth = $xp->findvalue("/ItemLookupResponse/Items/Item[$i]/ImageSets/ImageSet[$j]/LargeImage/Width");
234 $categories{$category}++;
235 if ( $categories{$category} > 1 ) {
236 $sthInsertImage->execute( $asin, $category . $categories{$category}, 'small', $smallWidth, $smallHeight, $smallLink );
237 $sthInsertImage->execute( $asin, $category . $categories{$category}, 'medium', $mediumWidth, $mediumHeight, $mediumLink );
238 $sthInsertImage->execute( $asin, $category . $categories{$category}, 'large', $largeWidth, $largeHeight, $largeLink );
239 } else {
240 $sthInsertImage->execute( $asin, $category, 'small', $smallWidth, $smallHeight, $smallLink );
241 $sthInsertImage->execute( $asin, $category, 'medium', $mediumWidth, $mediumHeight, $mediumLink );
242 $sthInsertImage->execute( $asin, $category, 'large', $largeWidth, $largeHeight, $largeLink );
245 # if($categories{$category} > 1) {
246 # print "Image Category: $category$categories{$category}\n";
247 # } else {
248 # print "Image Category: $category\n";
250 # print "Small ($smallWidth x $smallHeight): $smallLink\n";
251 # print "Medium ($mediumWidth x $mediumHeight): $mediumLink\n";
252 # print "Large ($largeWidth x $largeHeight): $largeLink\n";
253 } ## end for ( my $j = 1 ; $j <=...
255 $i++;
256 } ## end while ( $xp->find("/ItemLookupResponse/Items/Item[$i]"...
257 } ## end sub parse_xml
259 sub cleanup_queue {
260 my @union;
261 my @intersection;
262 my @difference;
264 @union = @intersection = @difference = ();
265 my %count = ();
266 foreach my $element ( @lookup_ids, @foundISBNs ) { $count{$element}++ }
267 foreach my $element ( keys %count ) {
268 push @union, $element;
269 push @{ $count{$element} > 1 ? \@intersection : \@difference }, $element;
272 my $sthAddFailedQueue = $dbh->prepare("INSERT INTO `library_queue_amazon_failed` (`id`,`type`) VALUES (?,?)");
274 foreach my $id (@difference) {
276 # print "Lookup of $lookup_type $id failed.\n";
277 $sthAddFailedQueue->execute( $id, $lookup_type );
279 } ## end sub cleanup_queue
281 sub refresh_old_data {
283 # my $sthGetStaleBooks = $dbh->prepare("SELECT `isbn`,`ean`,`asin` FROM");