[bug 2714]
[bioperl-live.git] / Bio / SeqIO / swissdriver.pm
blobc45c7fe743cf248580de47b47e0d08726746ab3b
1 # make as generic as possible (along with gbhandler, emblhandler)
3 # Let the code begin...
5 package Bio::SeqIO::swissdriver;
6 use vars qw(%FTQUAL_NO_QUOTE);
7 use strict;
8 use Bio::SeqIO::Handler::GenericRichSeqHandler;
9 use Data::Dumper;
11 use base qw(Bio::SeqIO);
13 # signals to process what's in the hash prior to next round, maps ann => names
14 my %SEC = (
15 OC => 'CLASSIFICATION',
16 OH => 'HOST', # not currently handled, bundled with organism data for now
17 OG => 'ORGANELLE',
18 OX => 'CROSSREF',
19 RA => 'AUTHORS',
20 RC => 'COMMENT',
21 RG => 'CONSRTM',
22 RP => 'POSITION',
23 RX => 'CROSSREF',
24 RT => 'TITLE',
25 RL => 'JOURNAL',
26 AS => 'ASSEMBLYINFO', # Third party annotation
27 '//' => 'RECORDEND'
30 # add specialized delimiters here for easier postprocessing
31 my %DELIM = (
32 CC => "\n",
33 DR => "\n",
34 DT => "\n",
37 sub _initialize {
38 my($self,@args) = @_;
40 $self->SUPER::_initialize(@args);
41 my $handler = $self->_rearrange([qw(HANDLER)],@args);
42 # hash for functions for decoding keys.
43 $handler ? $self->seqhandler($handler) :
44 $self->seqhandler(Bio::SeqIO::Handler::GenericRichSeqHandler->new(
45 -format => 'swiss',
46 -verbose => $self->verbose,
47 -builder => $self->sequence_builder
48 ));
49 if( ! defined $self->sequence_factory ) {
50 $self->sequence_factory(Bio::Seq::SeqFactory->new
51 (-verbose => $self->verbose(),
52 -type => 'Bio::Seq::RichSeq'));
56 =head2 next_seq
58 Title : next_seq
59 Usage : $seq = $stream->next_seq()
60 Function: returns the next sequence in the stream
61 Returns : Bio::Seq object
62 Args : none
64 =cut
66 sub next_seq {
67 my $self = shift;
68 my $hobj = $self->seqhandler;
69 local($/) = "\n";
70 # these contain values that need to carry over each round
71 my ($featkey, $qual, $annkey, $seqdata, $location);
72 my $lastann = '';
73 my $ct = 0;
74 # main parser
75 PARSER:
76 while(defined(my $line = $self->_readline)) {
77 chomp $line;
78 my ($ann, $data) = split(m{\s+}, $line, 2);
79 if ($ann) {
80 if ($ann eq 'FT') {
81 # sequence features
82 if ($data =~ m{^(\w+)\s+([\d\?\<]+)\s+([\d\?\>]+)(?:\s+?(\S.*))?}ox) {
83 # has location data and desc
84 if ($seqdata) {
85 $hobj->data_handler($seqdata);
86 $seqdata = ();
88 ($seqdata->{FEATURE_KEY}, my $loc1, my $loc2, $data) = ($1, $2, $3, $4);
89 $qual = 'description';
90 $seqdata->{$qual} = $data;
91 $seqdata->{NAME} = $ann;
92 $seqdata->{LOCATION} = "$loc1..$loc2" if defined $loc1;
93 next PARSER;
94 } elsif ($data =~ m{^\s+/([^=]+)(?:=(.+))?}ox) {
95 # has qualifer
96 ($qual, $data) = ($1, $2 || '');
97 $ct = ($seqdata->{$qual}) ?
98 ((ref($seqdata->{$qual})) ? scalar(@{ $seqdata->{$qual} }) : 1)
99 : 0 ;
101 $data =~ s{\.$}{};
102 if ($ct == 0) {
103 $seqdata->{$qual} .= ($seqdata->{$qual}) ?
104 ' '.$data : $data;
105 } else {
106 if (!ref($seqdata->{$qual})) {
107 $seqdata->{$qual} = [$seqdata->{$qual}];
109 ($seqdata->{$qual}->[$ct]) ?
110 ($seqdata->{$qual}->[$ct] .= ' '.$data) :
111 ($seqdata->{$qual}->[$ct] .= $data);
113 } else {
114 # simple annotations
115 if ($ann ne $lastann) {
116 if (!$SEC{$ann} && $seqdata) {
117 $hobj->data_handler($seqdata);
118 # can't use undef here; it can lead to subtle mem leaks
119 $seqdata = ();
121 $annkey = (!$SEC{$ann}) ? 'DATA' : # primary data
122 $SEC{$ann};
123 $seqdata->{'NAME'} = $ann if !$SEC{$ann};
125 last PARSER if $ann eq '//';
126 next PARSER if $ann eq 'SQ';
127 my $delim = $DELIM{$ann} || ' ';
128 $seqdata->{$annkey} .= ($seqdata->{$annkey}) ?
129 $delim.$data : $data;
130 $lastann = $ann;
132 } else {
133 # this should only be sequence (fingers crossed!)
134 SEQUENCE:
135 while (defined ($line = $self->_readline)) {
136 if (index($line, '//') == 0) {
137 $data =~ tr{0-9 \n}{}d;
138 $seqdata->{DATA} = $data;
139 #$self->debug(Dumper($seqdata));
140 $hobj->data_handler($seqdata);
141 $seqdata = ();
142 last PARSER;
143 } else {
144 $data .= $line;
145 $line = undef;
150 # some files have no // for the last file; this catches the last bit o' data
151 $hobj->data_handler($seqdata) if $seqdata;
152 return $hobj->build_sequence;
155 =head2 write_seq
157 Title : write_seq
158 Usage : $stream->write_seq($seq)
159 Function: writes the $seq object (must be seq) to the stream
160 Returns : 1 for success and 0 for error
161 Args : array of 1 to n Bio::SeqI objects
163 =cut
165 sub write_seq {
166 shift->throw("Use Bio::SeqIO::swiss write_seq() for output");
167 # maybe make a Writer class as well????
170 =head2 seqhandler
172 Title : seqhandler
173 Usage : $stream->seqhandler($handler)
174 Function: Get/Set teh Bio::Seq::HandlerBaseI object
175 Returns : Bio::Seq::HandlerBaseI
176 Args : Bio::Seq::HandlerBaseI
178 =cut
180 sub seqhandler {
181 my ($self, $handler) = @_;
182 if ($handler) {
183 $self->throw("Not a Bio::HandlerBaseI") unless
184 ref($handler) && $handler->isa("Bio::HandlerBaseI");
185 $self->{'_seqhandler'} = $handler;
187 return $self->{'_seqhandler'};
192 __END__