changes all issue tracking in preparation for switch to github issues
[bioperl-live.git] / Bio / DB / MeSH.pm
blob45da4d4e1c29b302d8633437dbc7410e250ce01e
2 # BioPerl module for Bio::DB::MeSH
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
8 # You may distribute this module under the same terms as perl itself
10 # POD documentation - main docs before the code
12 =head1 NAME
14 Bio::DB::MeSH - Term retrieval from a Web MeSH database
16 =head1 SYNOPSIS
18 my $mesh = Bio::DB::MeSH->new();
19 my $term = $mesh->get_exact_term('Butter');
20 print $term->description;
22 =head1 DESCRIPTION
24 This class retrieves a term from the Medical Subject Headings database
25 by the National Library of Medicine of USA.
26 See L<http://www.nlm.nih.gov/mesh/meshhome.html>.
28 This class implements L<Bio::SimpleAnalysisI> and wraps its methods under
29 L<get_exact_term>.
31 By default, web access uses L<WWW::Mechanize>, but in its absence
32 falls back to bioperl module L<Bio::WebAgent> which is a subclass of
33 L<LWP::UserAgent>. If not even that is not installed, it uses
34 L<Bio::Root::HTTPget>.
36 =head1 SEE ALSO
38 L<Bio::Phenotype::MeSH::Term>
40 =head1 FEEDBACK
42 =head2 Mailing Lists
44 User feedback is an integral part of the evolution of this and other
45 Bioperl modules. Send your comments and suggestions preferably to the
46 Bioperl mailing lists Your participation is much appreciated.
48 bioperl-l@bioperl.org - General discussion
49 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51 =head2 Support
53 Please direct usage questions or support issues to the mailing list:
55 I<bioperl-l@bioperl.org>
57 rather than to the module maintainer directly. Many experienced and
58 reponsive experts will be able look at the problem and quickly
59 address it. Please include a thorough description of the problem
60 with code and data examples if at all possible.
62 =head2 Reporting Bugs
64 report bugs to the Bioperl bug tracking system to help us keep track
65 the bugs and their resolution. Bug reports can be submitted via the
66 web:
68 https://github.com/bioperl/bioperl-live/issues
70 =head1 AUTHOR
72 Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
74 =head1 APPENDIX
76 The rest of the documentation details each of the object
77 methods. Internal methods are usually preceded with a _
79 =cut
82 # Let the code begin...
85 package Bio::DB::MeSH;
86 use strict;
88 use Bio::Phenotype::MeSH::Term;
89 use Bio::Phenotype::MeSH::Twig;
91 use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
94 my $URL = 'http://www.nlm.nih.gov/mesh/MBrowser.html';
96 my $ANALYSIS_SPEC= {name => 'MeSH term retrival',
97 type => 'Entry retrieval'};
98 my $INPUT_SPEC = [
99 {mandatory=>'true',
100 type => 'scalar',
101 'name'=> 'value',
105 my $RESULT_SPEC =
107 '' => 'Bio::Phenotype::MeSH::Term',
108 raw => 'raw output',
112 sub _init {
113 my $self = shift;
114 $self->url($URL);
115 $self->{'_ANALYSIS_SPEC'} =$ANALYSIS_SPEC;
116 $self->{'_INPUT_SPEC'} =$INPUT_SPEC;
117 $self->{'_RESULT_SPEC'} =$RESULT_SPEC;
118 $self->{'_ANALYSIS_NAME'} = $ANALYSIS_SPEC->{'name'};
119 $self->_webmodule;
120 return $self;
123 sub _webmodule {
124 my ($self) = shift;
125 $self->{'_webmodule'} = '';
126 eval {
127 require WWW::Mechanize;
129 unless ($@) {
130 $self->{'_webmodule'} = 'WWW::Mechanize';
131 return;
133 eval {
134 require LWP::UserAgent;
136 unless ($@) {
137 $self->{'_webmodule'} = 'Bio::WebAgent';
138 return;
140 require Bio::Root::HTTPget;
141 $self->{'_webmodule'} = 'Bio::Root::HTTPget';
145 =head2 get_exact_term
147 Title : get_exact_term
148 Usage : $s = $db->get_exact_term($value);
149 Function: Retrive a single MeSH term using a unique ID or exact name.
150 Example :
151 Returns : a Bio::Phenotype::MeSH::Term object
152 Args : scalar, UID or name of a MeSH term
154 The returned term object contains information about the immediate
155 vincinity of the term in the terminology hierarchy. See
156 L<Bio::Phenotype::MeSH::Twig>.
158 =cut
161 sub get_exact_term {
162 my ($self, $value) = @_;
163 $self->{'_term'} = undef;
164 $self->run($value) if $value;
165 $self->throw("Could not connect to the server")
166 unless $self->status eq 'COMPLETED';
167 return $self->result;
171 sub run {
172 my ($self, $value) = @_;
174 # check input
175 $self->throw("Need a MeSH name or ID as an input [$value]") if ref $value;
177 # internal run()
178 $self->_run($value);
182 sub _cgi_url {
183 my($self, $field, $term) = @_;
184 # we don't bother to URI::Escape $field and $term as this is an untainted private sub
185 return 'http://www.nlm.nih.gov/cgi/mesh/2003/MB_cgi?field='.$field.'&term='.$term;
189 sub _run {
190 my ($self, $value) = @_;
191 $self->throw('Need a value [$value]')
192 unless $value;;
193 # delay repeated calls by default by 3 sec, set delay() to change
194 # $self->sleep;
196 $self->status('TERMINATED_BY_ERROR');
198 if ($self->{'_webmodule'} eq 'WWW::Mechanize') {
199 $self->debug("using WWW::Mechanize...\n");
200 my $agent = WWW::Mechanize->new();
201 $agent->get($self->url);
202 $agent->status == 200
203 or $self->warn("Could not connect to the server\n") and return;
205 $agent->form_name('MB');
207 $agent->field("term", $value);
208 if ($value =~ /\w\d{6}/) {
209 $agent->field("field", 'uid');
210 } else {
211 $agent->field("field", 'entry');
213 $agent->click("exact");
215 $self->{'_content'} = $agent->content();
216 $self->status('COMPLETED');
217 return;
219 elsif ($self->{'_webmodule'} eq 'Bio::WebAgent') {
220 $self->debug("using LWP::UserAgent...\n");
221 my $response;
222 if ($value =~ /\w\d{6}/) {
223 $self->{'_content'} =
224 $response = eval {
225 $self->get( $self->_cgi_url('uid', $value) )
227 $self->warn("Could not connect to the server\n") and return
228 if $@;
229 } else {
230 $self->{'_content'} =
231 eval {
232 $response = $self->get( $self->_cgi_url('entry', $value) )
234 $self->warn("Could not connect to the server\n") and return
235 if $@;
237 if ($response->is_success) {
238 $self->{'_content'} = $response->content;
239 $self->status('COMPLETED');
241 return;
242 } else {
243 $self->debug("using Bio::Root::HTTPget...\n");
244 my $agent = Bio::Root::HTTPget->new();
245 if ($value =~ /\w\d{6}/) {
246 $self->{'_content'} =
247 eval {
248 $agent->get( $self->_cgi_url('uid', $value) )
250 $self->warn("Could not connect to the server\n") and return
251 if $@;
252 } else {
253 $self->{'_content'} =
254 eval {
255 $agent->get( $self->_cgi_url('entry', $value) )
257 $self->debug("Could not connect to the server\n") and return
258 if $@;
260 $self->status('COMPLETED');
264 sub result {
265 my ($self,$value) = @_;
267 $self->throw("Could not retrive results") unless $self->status('COMPLETED');
269 # no processing
270 return $self->{'_content'} if $value && $value eq 'raw';
273 # create a MeSH::Term object
274 $_ = $self->{'_content'};
275 $self->debug( substr($_, 0, 100) . "\n");
276 my ($id) = m|Unique \s+ ID </TH>
277 <TD (?: \s+ [^>]+ )? >
278 (.*?) # id
279 </TD> |ix;
280 my ($name) = m|MeSH \s+ Heading </TH>
281 <TD (?: \s+ [^>]+ )? >
282 (.*?) # name
283 </TD> |ix;
284 my ($desc) = m|Scope \s+ Note </TH>
285 <TD (?: \s+ [^>]+ )? >
286 (.*?) # desc
287 </TD>|isx;
288 $self->throw("No description returned: $_") unless defined $desc;
289 $desc =~ s/<.*?>//sg;
290 $desc =~ s/(?:\r)?\n/ /g;
291 $desc =~ s/\( +/\(/g;
292 $desc =~ s/ {2,}/ /g;
294 my $term = Bio::Phenotype::MeSH::Term->new(-id => $id,
295 -name => $name,
296 -description => $desc
298 my ($trees) = $self->{'_content'} =~ /MeSH Tree Structures(.*)/s;
300 while (m|Entry Term</TH><TD(?: [^>]+)?>(.*?)</TD>|ig) {
301 $term->add_synonym($1);
302 $self->debug("Synonym: |$1|\n");
305 foreach (split /<HR>/i, $trees ) {
306 next unless /$name/;
307 s/<TD.*?>/ /sgi;
308 s/<.*?>//sg;
309 s/&nbsp;/ /sg;
310 #print "|$_|";
311 my ($treeno) = /$name \[([^]]+)]/;
312 my ($parent_treeno) = $treeno =~ /(.*)\.\d{3}/;
313 my ($parent) = /\n +(\w.+) \[$parent_treeno\]/;
315 my $twig = Bio::Phenotype::MeSH::Twig->new(-parent => $parent);
316 $term->add_twig($twig);
318 $self->debug("Parent: |$parent|\n");
319 while (/\n +(\w.+) \[$treeno\./g ) {
320 $twig->add_child($1);
321 $self->debug("Child: |$1|\n");
324 while (/\n +(\w.+) \[$parent_treeno\./g ) {
325 next if $name eq $1;
326 $twig->add_sister($1);
327 $self->debug("Sister: |$1|\n");
330 return $term;