Bug 15774: Use Koha::Object(s) for additional fields
[koha.git] / C4 / External / Syndetics.pm
blob0570d0f7d6b96fd4380894ac0ab492b557373a12
1 package C4::External::Syndetics;
2 # Copyright (C) 2006 LibLime
3 # <jmf at liblime dot com>
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
20 use XML::Simple;
21 use XML::LibXML;
22 use LWP::Simple;
23 use LWP::UserAgent;
24 use HTTP::Request::Common;
26 use strict;
27 use warnings;
29 use vars qw(@ISA @EXPORT);
31 BEGIN {
32 require Exporter;
33 @ISA = qw(Exporter);
34 @EXPORT = qw(
35 &get_syndetics_index
36 &get_syndetics_summary
37 &get_syndetics_toc
38 &get_syndetics_editions
39 &get_syndetics_excerpt
40 &get_syndetics_reviews
41 &get_syndetics_anotes
45 # package-level variable
46 my $parser = XML::LibXML->new();
48 =head1 NAME
50 C4::External::Syndetics - Functions for retrieving Syndetics content in Koha
52 =head1 FUNCTIONS
54 This module provides facilities for retrieving Syndetics.com content in Koha
56 =head2 get_syndetics_summary
58 my $syndetics_summary= &get_syndetics_summary( $isbn );
60 Get Summary data from Syndetics
62 =cut
64 sub get_syndetics_index {
65 my ( $isbn, $upc, $oclc ) = @_;
67 return unless ( $isbn || $upc || $oclc );
69 my $response = _fetch_syndetics_content('INDEX.XML', $isbn, $upc, $oclc);
70 unless ($response->content_type =~ /xml/) {
71 return;
74 my $content = $response->content;
75 my $xmlsimple = XML::Simple->new();
76 $response = $xmlsimple->XMLin(
77 $content,
78 ) unless !$content;
80 my $syndetics_elements;
81 for my $available_type ('SUMMARY','TOC','FICTION','AWARDS1','SERIES1','SPSUMMARY','SPREVIEW', 'AVPROFILE', 'AVSUMMARY','DBCHAPTER','LJREVIEW','PWREVIEW','SLJREVIEW','CHREVIEW','BLREVIEW','HBREVIEW','KIREVIEW','CRITICASREVIEW','ANOTES') {
82 if (exists $response->{$available_type} && $response->{$available_type} =~ /$available_type/) {
83 $syndetics_elements->{$available_type} = $available_type;
87 return $syndetics_elements if $syndetics_elements;
90 sub get_syndetics_summary {
91 my ( $isbn, $upc, $oclc, $syndetics_elements ) = @_;
93 my $summary_type = exists($syndetics_elements->{'AVSUMMARY'}) ? 'AVSUMMARY.XML' : 'SUMMARY.XML';
94 my $response = _fetch_syndetics_content($summary_type, $isbn, $upc, $oclc);
95 unless ($response->content_type =~ /xml/) {
96 return;
99 my $content = $response->content;
101 my $summary;
102 eval {
103 my $doc = $parser->parse_string($content);
104 $summary = $doc->findvalue('//Fld520');
106 if ($@) {
107 warn "Error parsing Syndetics $summary_type";
109 return $summary if $summary;
112 sub get_syndetics_toc {
113 my ( $isbn,$upc,$oclc ) = @_;
115 my $response = _fetch_syndetics_content('TOC.XML', $isbn, $upc, $oclc);
116 unless ($response->content_type =~ /xml/) {
117 return;
120 my $content = $response->content;
121 my $xmlsimple = XML::Simple->new();
122 $response = $xmlsimple->XMLin(
123 $content,
124 forcearray => [ qw(Fld970) ],
125 ) unless !$content;
126 # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
127 my $toc;
128 $toc = \@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld970}} if $response;
129 return $toc if $toc;
132 sub get_syndetics_excerpt {
133 my ( $isbn,$upc,$oclc ) = @_;
135 my $response = _fetch_syndetics_content('DBCHAPTER.XML', $isbn, $upc, $oclc);
136 unless ($response->content_type =~ /xml/) {
137 return;
140 my $content = $response->content;
141 my $xmlsimple = XML::Simple->new();
142 $response = $xmlsimple->XMLin(
143 $content,
144 forcearray => [ qw(Fld520) ],
145 ) unless !$content;
146 # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
147 my $excerpt;
148 $excerpt = \@{$response->{VarFlds}->{VarDFlds}->{Notes}->{Fld520}} if $response;
149 return XMLout($excerpt, NoEscape => 1) if $excerpt;
152 sub get_syndetics_reviews {
153 my ( $isbn,$upc,$oclc,$syndetics_elements ) = @_;
155 my @reviews;
156 my $review_sources = [
157 {title => 'Library Journal Review', file => 'LJREVIEW.XML', element => 'LJREVIEW'},
158 {title => 'Publishers Weekly Review', file => 'PWREVIEW.XML', element => 'PWREVIEW'},
159 {title => 'School Library Journal Review', file => 'SLJREVIEW.XML', element => 'SLJREVIEW'},
160 {title => 'CHOICE Review', file => 'CHREVIEW.XML', element => 'CHREVIEW'},
161 {title => 'Booklist Review', file => 'BLREVIEW.XML', element => 'BLREVIEW'},
162 {title => 'Horn Book Review', file => 'HBREVIEW.XML', element => 'HBREVIEW'},
163 {title => 'Kirkus Book Review', file => 'KIREVIEW.XML', element => 'KIREVIEW'},
164 {title => 'Criticas Review', file => 'CRITICASREVIEW.XML', element => 'CRITICASREVIEW'},
165 {title => 'Spanish Review', file => 'SPREVIEW.XML', element => 'SPREVIEW'},
168 for my $source (@$review_sources) {
169 if ($syndetics_elements->{$source->{element}} and $source->{element} =~ $syndetics_elements->{$source->{element}}) {
171 } else {
172 #warn "Skipping $source->{element} doesn't match $syndetics_elements->{$source->{element}} \n";
173 next;
175 my $response = _fetch_syndetics_content($source->{file}, $isbn, $upc, $oclc);
176 unless ($response->content_type =~ /xml/) {
177 next;
180 my $content = $response->content;
182 eval {
183 my $doc = $parser->parse_string($content);
185 # note that using findvalue strips any HTML elements embedded
186 # in that review. That helps us handle slight differences
187 # in the output provided by Syndetics 'old' and 'new' versions
188 # of their service and cleans any questionable HTML that
189 # may be present in the reviews, but does mean that any
190 # <B> and <I> tags used to format the review are also gone.
191 my $result = $doc->findvalue('//Fld520');
192 push @reviews, {title => $source->{title}, reviews => [ { content => $result } ]} if $result;
194 if ($@) {
195 warn "Error parsing Syndetics $source->{title} review";
198 return \@reviews;
201 sub get_syndetics_editions {
202 my ( $isbn,$upc,$oclc ) = @_;
204 my $response = _fetch_syndetics_content('FICTION.XML', $isbn, $upc, $oclc);
205 unless ($response->content_type =~ /xml/) {
206 return;
209 my $content = $response->content;
211 my $xmlsimple = XML::Simple->new();
212 $response = $xmlsimple->XMLin(
213 $content,
214 forcearray => [ qw(Fld020) ],
215 ) unless !$content;
216 # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
217 my $similar_items;
218 $similar_items = \@{$response->{VarFlds}->{VarDFlds}->{NumbCode}->{Fld020}} if $response;
219 return $similar_items if $similar_items;
222 sub get_syndetics_anotes {
223 my ( $isbn,$upc,$oclc) = @_;
225 my $response = _fetch_syndetics_content('ANOTES.XML', $isbn, $upc, $oclc);
226 unless ($response->content_type =~ /xml/) {
227 return;
230 my $content = $response->content;
232 my $xmlsimple = XML::Simple->new();
233 $response = $xmlsimple->XMLin(
234 $content,
235 forcearray => [ qw(Fld980) ],
236 ForceContent => 1,
237 ) unless !$content;
238 my @anotes;
239 for my $fld980 (@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld980}}) {
240 # this is absurd, but sometimes this data serializes differently
241 if(ref($fld980->{a}->{content}) eq 'ARRAY') {
242 for my $content (@{$fld980->{a}->{content}}) {
243 push @anotes, {content => $content};
247 else {
248 push @anotes, {content => $fld980->{a}->{content}};
251 return \@anotes;
254 sub _fetch_syndetics_content {
255 my ( $element, $isbn, $upc, $oclc ) = @_;
257 $isbn = '' unless defined $isbn;
258 $upc = '' unless defined $upc;
259 $oclc = '' unless defined $oclc;
261 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
263 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/$element&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
264 my $ua = LWP::UserAgent->new;
265 $ua->timeout(10);
266 $ua->env_proxy;
267 my $response = $ua->get($url);
269 warn "could not retrieve $url" unless $response->content;
270 return $response;
274 __END__
276 =head1 NOTES
278 =cut
280 =head1 AUTHOR
282 Joshua Ferraro <jmf@liblime.com>
284 =cut