2 # BioPerl module for Bio::DB::GenPept
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Aaron Mackey <amackey@virginia.edu>
8 # Copyright Aaron Mackey
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 # completely reworked by Jason Stajich to use Bio::DB::WebDBSeqI 2000-12-12
18 Bio::DB::GenPept - Database object interface to GenPept
22 $gb = Bio::DB::GenPept->new();
24 $seq = $gb->get_Seq_by_id('195055'); # Unique ID
28 $seq = $gb->get_Seq_by_acc('P09651.5'); # Accession Number
30 my $seqio = $gb->get_Stream_by_id(['195055', 'P09651.5']);
31 while( my $seq = $seqio->next_seq ) {
32 print "seq is is ", $seq->display_id, "\n";
37 Allows the dynamic retrieval of Sequence objects (Bio::Seq) from the GenPept
38 database at NCBI, via an Entrez query.
40 WARNING: Please do NOT spam the Entrez web server with multiple requests.
41 NCBI offers Batch Entrez for this purpose. Batch Entrez support will likely
42 be supported in a future version of DB::GenPept.
44 Currently the only return format supported by NCBI Entrez for GenPept
45 database is GenPept format, so any format specification passed to
46 GenPept will be ignored still be forced to GenPept format (which is
53 User feedback is an integral part of the
54 evolution of this and other Bioperl modules. Send
55 your comments and suggestions preferably to one
56 of the Bioperl mailing lists. Your participation
59 bioperl-l@bioperl.org - General discussion
60 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
64 Please direct usage questions or support issues to the mailing list:
66 I<bioperl-l@bioperl.org>
68 rather than to the module maintainer directly. Many experienced and
69 reponsive experts will be able look at the problem and quickly
70 address it. Please include a thorough description of the problem
71 with code and data examples if at all possible.
75 Report bugs to the Bioperl bug tracking system to help us keep track
76 the bugs and their resolution. Bug reports can be submitted via the
79 https://github.com/bioperl/bioperl-live/issues
81 =head1 AUTHOR - Aaron Mackey, Jason Stajich
83 Email amackey@virginia.edu
84 Email jason@bioperl.org
88 The rest of the documentation details each of the object
89 methods. Internal methods are usually preceded with a _
93 # Let the code begin...
95 package Bio
::DB
::GenPept
;
97 use vars
qw($DEFAULTFORMAT $DEFAULTMODE %PARAMSTRING);
99 use base qw(Bio::DB::NCBIHelper);
101 $DEFAULTMODE = 'single';
102 $DEFAULTFORMAT = 'gp';
104 'batch' => { 'db' => 'protein',
106 'tool' => 'bioperl'},
108 'gi' => { 'db' => 'protein',
111 'retmode' => 'text'},
112 'version' => { 'db' => 'protein',
115 'retmode' => 'text'},
116 'single' => { 'db' => 'protein',
119 'retmode' => 'text'},
121 'query_key' => 'querykey',
122 'WebEnv' => 'cookie',
126 'retmode' => 'text'},
130 # the new way to make modules a little more lightweight
132 my($class, @args) = @_;
133 my $self = $class->SUPER::new
(@args);
134 $self->request_format($self->default_format);
141 Usage : my %params = $self->get_params($mode)
142 Function: Returns key,value pairs to be passed to NCBI database
143 for either 'batch' or 'single' sequence retrieval method
144 Returns : a key,value pair hash
145 Args : 'single' or 'batch' mode for retrieval
150 my ($self, $mode) = @_;
151 return defined $PARAMSTRING{$mode} ?
%{$PARAMSTRING{$mode}} : %{$PARAMSTRING{$DEFAULTMODE}};
154 =head2 default_format
156 Title : default_format
157 Usage : my $format = $self->default_format
158 Function: Returns default sequence format for this module
165 return $DEFAULTFORMAT;
168 # from Bio::DB::WebDBSeqI from Bio::DB::RandomAccessI
170 =head1 Routines from Bio::DB::WebDBSeqI and Bio::DB::RandomAccessI
174 Title : get_Seq_by_id
175 Usage : $seq = $db->get_Seq_by_id('P09651.5')
176 Function: Gets a Bio::Seq object by its name
177 Returns : a Bio::Seq object
178 Args : the id (as a string) of a sequence
179 Throws : "id does not exist" exception
181 =head2 get_Seq_by_acc
183 Title : get_Seq_by_acc
184 Usage : $seq = $db->get_Seq_by_acc('AAC73346');
185 Function: Gets a Seq objects by accession number
186 Returns : Bio::Seq object
187 Args : accession number to retrieve by
189 =head1 Routines implemented by Bio::DB::NCBIHelper
194 Usage : my $url = $self->get_request
195 Function: HTTP::Request
197 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
199 =head2 get_Stream_by_id
201 Title : get_Stream_by_id
202 Usage : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
203 Function: Gets a series of Seq objects by unique identifiers
204 Returns : a Bio::SeqIO stream object
205 Args : $ref : a reference to an array of unique identifiers for
206 the desired sequence entries
208 =head2 get_Stream_by_acc (2)
210 Title : get_Stream_by_acc
211 Usage : $seq = $db->get_Stream_by_acc($acc);
212 Function: Gets a series of Seq objects by accession numbers
213 Returns : a Bio::SeqIO stream object
214 Args : $ref : a reference to an array of accession numbers for
215 the desired sequence entries
216 Note : For GenBank, this just calls the same code for get_Stream_by_id()
218 =head2 get_Stream_by_query
220 Title : get_Stream_by_query
221 Usage : $seq = $db->get_Stream_by_query('ROA1_HUMAN');
222 Function: Gets a series of Seq objects by unique display_id
223 Returns : a Bio::SeqIO stream object
224 Args : $ref : display_id to query by
226 =head2 request_format
228 Title : request_format
229 Usage : my $format = $self->request_format;
230 $self->request_format($format);
231 Function: Get/Set sequence format retrieval
232 Returns : string representing format
233 Args : $format = sequence format
237 # override to force format to be GenPept regardless
240 return $self->SUPER::request_format
($self->default_format());