2 # BioPerl module for Bio::SeqIO::kegg
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Allen Day <allenday@ucla.edu>
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::SeqIO::kegg - KEGG sequence input/output stream
20 # It is probably best not to use this object directly, but
21 # rather go through the SeqIO handler system. Go:
25 $stream = Bio::SeqIO->new(-file => $filename, -format => 'KEGG');
27 while ( my $seq = $stream->next_seq() ) {
28 # do something with $seq
33 This class transforms KEGG gene records into Bio::Seq objects.
35 =head2 Mapping of record properties to object properties
37 This section is supposed to document which sections and properties of
38 a KEGG databank record end up where in the Bioperl object model. It
39 is far from complete and presently focuses only on those mappings
40 which may be non-obvious. $seq in the text refers to the
41 Bio::Seq::RichSeqI implementing object returned by the parser for each
56 $seq->annotation->get_Annotations('description');
60 grep {$_->database eq 'KO'} $seq->annotation->get_Annotations('dblink')
64 grep {$_->database eq 'PATH'}
65 $seq->annotation->get_Annotations('dblink')
69 FIXME, NOT IMPLEMENTED
73 for my $pathway ( $seq->annotation->get_Annotations('pathway') ) {
79 $seq->annotation->get_Annotations('dblink')
83 FIXME, NOT IMPLEMENTED
99 User feedback is an integral part of the evolution of this and other
100 Bioperl modules. Send your comments and suggestions preferably to one
101 of the Bioperl mailing lists. Your participation is much appreciated.
103 bioperl-l@bioperl.org - General discussion
104 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
108 Please direct usage questions or support issues to the mailing list:
110 I<bioperl-l@bioperl.org>
112 rather than to the module maintainer directly. Many experienced and
113 reponsive experts will be able look at the problem and quickly
114 address it. Please include a thorough description of the problem
115 with code and data examples if at all possible.
117 =head2 Reporting Bugs
119 Report bugs to the Bioperl bug tracking system to help us keep track
120 the bugs and their resolution. Bug reports can be submitted via the web:
122 https://github.com/bioperl/bioperl-live/issues
124 =head1 AUTHOR - Allen Day
126 Email allenday@ucla.edu
130 The rest of the documentation details each of the object
131 methods. Internal methods are usually preceded with a _
135 # Let the code begin...
137 package Bio
::SeqIO
::kegg
;
140 use Bio
::SeqFeature
::Generic
;
142 use Bio
::Seq
::SeqFactory
;
143 use Bio
::Annotation
::Collection
;
144 use Bio
::Annotation
::Comment
;
145 use Bio
::Annotation
::DBLink
;
147 use base
qw(Bio::SeqIO);
150 my($self,@args) = @_;
152 $self->SUPER::_initialize
(@args);
153 # hash for functions for decoding keys.
154 $self->{'_func_ftunit_hash'} = {};
155 if( ! defined $self->sequence_factory ) {
156 $self->sequence_factory(Bio
::Seq
::SeqFactory
->new
157 (-verbose
=> $self->verbose(),
158 -type
=> 'Bio::Seq::RichSeq'));
165 Usage : $seq = $stream->next_seq()
166 Function: returns the next sequence in the stream
167 Returns : Bio::Seq::RichSeq object
173 my ($self,@args) = @_;
174 my $builder = $self->sequence_builder();
179 my (@acc, @features);
180 my ($display_id, $annotation);
183 # initialize; we may come here because of starting over
188 %params = (-verbose
=> $self->verbose); # reset hash
191 $buffer = $self->_readline();
193 return if( !defined $buffer ); # end of file
194 $buffer =~ /^ENTRY/ ||
195 $self->throw("KEGG stream with bad ENTRY line
. Not KEGG
in my book
. Got
$buffer'");
198 my @chunks = split /\n(?=\S)/, $buffer;
200 foreach my $chunk (@chunks){
201 my($key) = $chunk =~ /^(\S+)/;
202 $FIELDS{$key} = $chunk;
205 # changing to split method to get entry_ids that include
206 # sequence version like Whatever.1
207 my(undef,$entry_id,$entry_seqtype,$entry_species) =
208 split(' ',$FIELDS{ENTRY});
212 ($name) = $FIELDS{NAME} =~ /^NAME\s+(.+)$/;
215 my( $definition, $aa_length, $aa_seq, $nt_length, $nt_seq );
217 if(( exists $FIELDS{DEFINITION} ) and ( $FIELDS{DEFINITION} =~ /^DEFINITION/ )) {
218 ($definition) = $FIELDS{DEFINITION} =~ /^DEFINITION\s+(.+)$/s;
219 $definition =~ s/\s+/ /gs;
221 if(( exists $FIELDS{AASEQ} ) and ( $FIELDS{AASEQ} =~ /^AASEQ/ )) {
222 ($aa_length,$aa_seq) = $FIELDS{AASEQ} =~ /^AASEQ\s+(\d+)\n(.+)$/s;
225 if(( exists $FIELDS{NTSEQ} ) and ( $FIELDS{NTSEQ} =~ /^NTSEQ/ )) {
226 ($nt_length,$nt_seq) = $FIELDS{NTSEQ} =~ /^NTSEQ\s+(\d+)\n(.+)$/s;
230 $annotation = Bio::Annotation::Collection->new();
232 $annotation->add_Annotation('description
',
233 Bio::Annotation::Comment->new(-text => $definition));
235 $annotation->add_Annotation('aa_seq
',
236 Bio::Annotation::Comment->new(-text => $aa_seq));
238 my($ortholog_db,$ortholog_id,$ortholog_desc);
239 if ($FIELDS{ORTHOLOG}) {
240 ($ortholog_db,$ortholog_id,$ortholog_desc) = $FIELDS{ORTHOLOG}
241 =~ /^ORTHOLOG\s+(\S+):\s+(\S+)\s+(.*?)$/;
243 $annotation->add_Annotation('dblink
',Bio::Annotation::DBLink->new(
244 -database => $ortholog_db,
245 -primary_id => $ortholog_id,
246 -comment => $ortholog_desc) );
250 $FIELDS{MOTIF} =~ s/^MOTIF\s+//;
251 while($FIELDS{MOTIF} =~/\s*?(\S+):\s+(.+?)$/mg){
254 foreach my $id (split(/\s+/, $ids)){
256 $annotation->add_Annotation('dblink
',Bio::Annotation::DBLink->new(
264 if($FIELDS{PATHWAY}) {
265 $FIELDS{PATHWAY} =~ s/^PATHWAY\s+//;
266 while($FIELDS{PATHWAY} =~ /\s*PATH:\s+(.+)$/mg){
267 $annotation->add_Annotation('pathway
',
268 Bio::Annotation::Comment->new(-text => "$1"));
272 if($FIELDS{POSITION}) {
273 $FIELDS{POSITION} =~ s/^POSITION\s+//;
274 $annotation->add_Annotation('position
',
275 Bio::Annotation::Comment->new(-text => $FIELDS{POSITION}));
278 if ($FIELDS{CLASS}) {
279 $FIELDS{CLASS} =~ s/^CLASS\s+//;
280 $FIELDS{'CLASS
'} =~ s/\n//g;
281 while($FIELDS{CLASS} =~ /(.*?)\[(\S+):(\S+)\]/g){
282 my ($pathway,$db,$id) = ($1,$2,$3);
283 $pathway =~ s/\s+/ /g;
284 $pathway =~ s/\s$//g;
285 $pathway =~ s/^\s+//;
286 $annotation->add_Annotation('pathway
',
287 Bio::Annotation::Comment->new(-text => $pathway));
289 $annotation->add_Annotation('dblink
',Bio::Annotation::DBLink->new(
290 -database => $db, -primary_id => $id));
294 if($FIELDS{DBLINKS}) {
295 $FIELDS{DBLINKS} =~ s/^DBLINKS/ /;
296 while($FIELDS{DBLINKS} =~ /\s+(\S+):\s+(\S+)\n?/gs){ ### modified
297 $annotation->add_Annotation('dblink
',Bio::Annotation::DBLink->new(
298 -database => $1, -primary_id => $2)) if $1;
302 $params{'-alphabet
'} = 'dna
';
303 $params{'-seq
'} = $nt_seq;
304 $params{'-display_id
'} = $name;
305 $params{'-accession_number
'} = $entry_id;
306 $params{'-species
'} = Bio::Species->new(
307 -common_name => $entry_species);
308 $params{'-annotation
'} = $annotation;
310 $builder->add_slot_value(%params);
311 $seq = $builder->make_object();
319 Note : write_seq() is not implemented for KEGG format output.
324 shift->throw("write_seq() not implemented for KEGG format output.");