3 # BioPerl module for Bio::SeqIO::embldriver
5 # Cared for by Ewan Birney <birney@ebi.ac.uk>
7 # Copyright Ewan Birney
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::SeqIO::embldriver - EMBL sequence input/output stream
19 It is probably best not to use this object directly, but
20 rather go through the SeqIO handler system. Go:
22 $stream = Bio::SeqIO->new(-file => $filename, -format => 'embldriver');
24 while ( (my $seq = $stream->next_seq()) ) {
25 # do something with $seq
30 This object can transform Bio::Seq objects to and from EMBL flat
33 There is a lot of flexibility here about how to dump things which
34 should be documented more fully.
36 There should be a common object that this and Genbank share (probably
37 with Swissprot). Too much of the magic is identical.
39 =head2 Optional functions
45 (output only) shows the dna or not
49 (output only) provides a sorting func which is applied to the FTHelpers
52 =item _id_generation_func()
54 This is function which is called as
56 print "ID ", $func($annseq), "\n";
58 To generate the ID line. If it is not there, it generates a sensible ID
59 line using a number of tools.
61 If you want to output annotations in EMBL format they need to be
62 stored in a Bio::Annotation::Collection object which is accessible
63 through the Bio::SeqI interface method L<annotation()|annotation>.
65 The following are the names of the keys which are polled from a
66 L<Bio::Annotation::Collection> object.
68 reference - Should contain Bio::Annotation::Reference objects
69 comment - Should contain Bio::Annotation::Comment objects
70 dblink - Should contain Bio::Annotation::DBLink objects
78 User feedback is an integral part of the evolution of this and other
79 Bioperl modules. Send your comments and suggestions preferably to one
80 of the Bioperl mailing lists. Your participation is much appreciated.
82 bioperl-l@bioperl.org - General discussion
83 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
87 Report bugs to the Bioperl bug tracking system to help us keep track
88 the bugs and their resolution. Bug reports can be submitted via
91 http://bugzilla.open-bio.org/
93 =head1 AUTHOR - Ewan Birney
95 Email birney@ebi.ac.uk
99 The rest of the documentation details each of the object
100 methods. Internal methods are usually preceded with a _
104 # Let the code begin...
106 package Bio
::SeqIO
::embldriver
;
107 use vars
qw(%FTQUAL_NO_QUOTE);
109 use Bio::SeqIO::Handler::GenericRichSeqHandler;
112 use base qw(Bio::SeqIO);
114 my %FTQUAL_NO_QUOTE = map {$_ => 1} qw(
117 cons_splice direction
121 transl_except transl_table
133 OC
=> 'CLASSIFICATION',
146 AH
=> 'TPA_HEADER', # Third party annotation
147 AS
=> 'TPA_DATA', # Third party annotation
159 OC
=> 'CLASSIFICATION',
160 OH
=> 'HOST', # not currently handled, bundled with organism data for now
170 AS
=> 'ASSEMBLYINFO', # Third party annotation
179 # signals to process what's in the hash prior to next round
180 # these should be changed to map secondary data
181 my %PRIMARY = map {$_ => 1} qw(ID AC DT DE SV KW OS RN AH DR FH CC SQ FT WGS CON ANN TPA //);
184 my($self,@args) = @_;
186 $self->SUPER::_initialize
(@args);
187 my $handler = $self->_rearrange([qw(HANDLER)],@args);
188 # hash for functions for decoding keys.
189 $handler ?
$self->seqhandler($handler) :
190 $self->seqhandler(Bio
::SeqIO
::Handler
::GenericRichSeqHandler
->new(
192 -verbose
=> $self->verbose,
193 -builder
=> $self->sequence_builder
196 if( ! defined $self->sequence_factory ) {
197 $self->sequence_factory(Bio
::Seq
::SeqFactory
->new
198 (-verbose
=> $self->verbose(),
199 -type
=> 'Bio::Seq::RichSeq'));
206 Usage : $seq = $stream->next_seq()
207 Function: returns the next sequence in the stream
208 Returns : Bio::Seq object
215 my $hobj = $self->seqhandler;
217 my ($featkey, $qual, $annkey, $delim, $seqdata);
221 while(defined(my $line = $self->_readline)) {
222 next PARSER
if $line =~ m{^\s*$};
224 my ($ann,$data) = split m{\s{2,3}}, $line , 2;
225 next PARSER
if ($ann eq 'XX' || $ann eq 'FH');
230 if ($data =~ m{^(\S+)\s+([^\n]+)}) {
231 $hobj->data_handler($seqdata) if $seqdata;
233 ($seqdata->{FEATURE_KEY
}, $data) = ($1, $2);
234 $seqdata->{NAME
} = $ann;
236 } elsif ($data =~ m{^\s+/([^=]+)=?(.+)?}) {
237 ($qual, $data) = ($1, $2 ||'');
238 $ct = (exists $seqdata->{$qual}) ?
239 ((ref($seqdata->{$qual})) ?
scalar(@
{ $seqdata->{$qual} }) : 1)
243 $data =~ tr{"}{}d; # we don't care about quotes yet...
244 my $delim = ($FTQUAL_NO_QUOTE{$qual}) ?
'' : ' ';
246 $seqdata->{$qual} .= ($seqdata->{$qual}) ?
250 if (!ref($seqdata->{$qual})) {
251 $seqdata->{$qual} = [$seqdata->{$qual}];
253 (exists $seqdata->{$qual}->[$ct]) ?
254 (($seqdata->{$qual}->[$ct]) .= $delim.$data) :
255 (($seqdata->{$qual}->[$ct]) .= $data);
260 last PARSER
if $ann eq '//';
261 if ($ann ne $lastann) {
262 if (!$SEC{$ann} && $seqdata) {
263 $hobj->data_handler($seqdata);
264 # can't use undef here; it can lead to subtle mem leaks
267 $annkey = (!$SEC{$ann}) ?
'DATA' : # primary data
269 $seqdata->{'NAME'} = $ann if !$SEC{$ann};
272 # toss the data for SQ lines; this needs to be done after the
273 # call to the data handler
275 next PARSER
if $ann eq 'SQ';
276 my $delim = $DELIM{$ann} || ' ';
277 $seqdata->{$annkey} .= ($seqdata->{$annkey}) ?
278 $delim.$data : $data;
282 # this should only be sequence (fingers crossed!)
284 while (defined ($line = $self->_readline)) {
285 if (index($line, '//') == 0) {
286 $data =~ tr{0-9 \n}{}d;
287 $seqdata->{DATA
} = $data;
288 #$self->debug(Dumper($seqdata));
289 $hobj->data_handler($seqdata);
299 $hobj->data_handler($seqdata) if $seqdata;
301 return $hobj->build_sequence;
308 while(defined(my $line = $self->_readline)) {
309 next if $line =~ m{^\s*$};
311 my ($ann,$data) = split m{\s{2,3}}, $line , 2;
313 $self->debug("Ann: [$ann]\n\tData: [$data]\n");
314 last PARSER
if $ann =~ m{//};
321 Usage : $stream->write_seq($seq)
322 Function: writes the $seq object (must be seq) to the stream
323 Returns : 1 for success and 0 for error
324 Args : array of 1 to n Bio::SeqI objects
329 shift->throw("Use Bio::SeqIO::embl for output");
330 # maybe make a Writer class as well????
336 Usage : $stream->seqhandler($handler)
337 Function: Get/Set teh Bio::Seq::HandlerBaseI object
338 Returns : Bio::Seq::HandlerBaseI
339 Args : Bio::Seq::HandlerBaseI
344 my ($self, $handler) = @_;
346 $self->throw("Not a Bio::HandlerBaseI") unless
347 ref($handler) && $handler->isa("Bio::HandlerBaseI");
348 $self->{'_seqhandler'} = $handler;
350 return $self->{'_seqhandler'};