2 # BioPerl module for Bio::DB::SeqVersion::gi
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Brian Osborne
8 # Copyright Brian Osborne 2006
10 # You may distribute this module under the same terms as Perl itself
12 # POD documentation - main docs before the code
16 Bio::DB::SeqVersion::gi - interface to NCBI Sequence Revision History page
20 Do not use this module directly, use Bio::DB::SeqVersion.
22 use Bio::DB::SeqVersion;
24 my $query = Bio::DB::SeqVersion->new(-type => 'gi');
26 # all GIs, which will include the GI used to query
27 my @all_gis = $query->get_all(2);
29 # the most recent GI, which may or may not be the GI used to query
30 my $live_gi = $query->get_recent(2);
32 # get all the visible data on the Sequence Revision page
33 my $array_ref = $query->get_history(11111111);
35 These methods can also take accession numbers as arguments, just like
36 the Sequence Revision page itself.
40 All sequence entries at GenBank are identified by a pair of
41 identifiers, an accession and a numeric identifier, and this number is
42 frequently called a GI number (B<G>enInfo B<I>dentifier). The accession
43 is stable, but each new version of the sequence entry for the accession
44 receives a new GI number (see L<http://www.ncbi.nlm.nih.gov/Sitemap/sequenceIDs.html>
45 for more information on GenBank identifiers). One accession
46 can have one or more GI numbers and the highest of these is the most recent,
49 Information on an accession and its associated GI numbers is available at
50 the Sequence Revision History page at NCBI,
51 L<http://www.ncbi.nlm.nih.gov/entrez/sutils/girevhist.cgi>, this information is
52 not available in file format. This module queries the Web page and retrieves GI
53 numbers and related data given an accession (e.g. NP_111111, A11111, P12345) or
54 a GI number (e.g. 2, 11111111) as query.
60 User feedback is an integral part of the evolution of this and other
61 Bioperl modules. Send your comments and suggestions preferably to one
62 of the Bioperl mailing lists. Your participation is much appreciated.
64 bioperl-l@bioperl.org - General discussion
65 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
69 Please direct usage questions or support issues to the mailing list:
71 I<bioperl-l@bioperl.org>
73 rather than to the module maintainer directly. Many experienced and
74 reponsive experts will be able look at the problem and quickly
75 address it. Please include a thorough description of the problem
76 with code and data examples if at all possible.
80 Report bugs to the Bioperl bug tracking system to help us keep track
81 the bugs and their resolution. Bug reports can be submitted via the
84 https://redmine.open-bio.org/projects/bioperl/
86 =head1 AUTHOR - Brian Osborne
88 Email E<lt> osborne at optonline dot net E<gt>
92 Torsten Seemann - torsten.seemann AT infotech.monash.edu.au
96 The rest of the documentation details each of the object
97 methods. Internal methods are usually preceded with a _
101 # Let the code begin...
103 package Bio
::DB
::SeqVersion
::gi
;
105 use HTML
::TokeParser
;
107 use base
qw(Bio::DB::SeqVersion);
109 # Private class variables
111 my $URL = 'http://www.ncbi.nlm.nih.gov/nuccore/%s?report=girevhist';
116 Usage : $gb = Bio::DB::SeqVersion::gi->new
117 Function: Creates a new query object
118 Returns : New query object
123 my ( $class, @args ) = @_;
124 my $self = $class->SUPER::new
(@args);
132 Usage : my @gis = $q->get_all(2)
133 Function: Get all GI numbers given a GI number
134 Returns : An array of GI numbers, earliest GI number is the 0 element
135 Args : A single GI number (string)
140 my ( $self, $id ) = @_;
142 $id eq $self->{_last_id
}
143 ?
$ref = $self->{_last_result
}
144 : $ref = $self->get_history($id);
145 for my $row ( @
{$ref} ) {
154 Usage : my $newest_gi = $q->get_recent(2)
155 Function: Get most recent GI given a single GI
157 Args : A single GI number (string)
162 my ( $self, $id ) = @_;
164 $id eq $self->{_last_id
}
165 ?
$ref = $self->{_last_result
}
166 : $ref = $self->get_history($id);
173 Usage : my $ref = $query_obj->get_history()
174 Function: Queries the NCBI Revision page, gets the data from the HTML table
175 Returns : Reference to an array of arrays where element 0 refers to the most
176 recent version and the last element refers to the oldest version.
177 In the second dimension the elements are:
184 For example, to get the GI number of the first version:
186 $ref->[$#{@$ref}]->[0]
188 To get the Update Date of the latest version:
192 Args : One identifier (string)
197 my ( $self, $id ) = @_;
198 my $html = $self->_get_request($id);
199 my ($ref, $status) = $self->_process_data($html);
201 # store the very last result in case some other methods
202 # are called using the same identifier
203 $self->{_last_result
} = $ref;
204 $self->{_last_id
} = $id;
211 Usage : my $url = $self->_get_request
212 Function: GET using NCBI Revision page URL, uses Root::HTTPget
214 Args : One identifier (string)
219 my ( $self, $id ) = @_;
221 $self->throw("Must specify a single id to query") if ( !$id || ref($id) );
223 my $url = sprintf( $URL, $id );
224 my $response = $self->get($url);
225 if ( not $response->is_success ) {
226 $self->throw( "Can't query $url: " . $response->status_line . "\n" ."ID likely does not exist");
228 $self->debug( "Response is:\n", $response->content, "\n" );
229 return $response->content;
234 Title : _process_data
235 Usage : $self->_process_data($html)
236 Function: extract data from HTML
237 Args : HTML from Revision History page
238 Returns : reference to an array of arrays
243 my ( $self, $html ) = @_;
246 my ($table, $status);
248 my $p = HTML
::TokeParser
->new(\
$html);
249 while (my $token = $p->get_tag('td')) {
250 #print Dumper $token;
251 print $p->get_text."\n";;
253 #if ($html =~ /Current\s+status:\s+([a-z]+)<\/div>(<table.+)/xms) {
254 # ($status, $table) = ($1, $2);
256 # $self->throw("Could not parse 'Revision history' HTML table: \n$html")
258 #my (@rows) = $table =~ /<tr>(.+?)<\/tr>/g;
259 #shift @rows; # get rid of header
260 #for my $row (@rows) {
261 # my (@arr) = $row =~ />([^<>]+)/g;
262 # $table[$count] = \@arr;
265 #$self->throw("Bad table data: \n".join("\n",@rows)) unless @table > 1;
266 print Dumper \
@table;