3 # BioPerl module for Bio::SeqIO::gcg
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Ewan Birney <birney@ebi.ac.uk>
8 # and Lincoln Stein <lstein@cshl.org>
10 # Copyright Ewan Birney & Lincoln Stein
12 # You may distribute this module under the same terms as perl itself
15 # October 18, 1999 Largely rewritten by Lincoln Stein
17 # POD documentation - main docs before the code
21 Bio::SeqIO::gcg - GCG sequence input/output stream
25 Do not use this module directly. Use it via the Bio::SeqIO class.
29 This object can transform Bio::Seq objects to and from GCG flat
36 User feedback is an integral part of the evolution of this and other
37 Bioperl modules. Send your comments and suggestions preferably to one
38 of the Bioperl mailing lists. Your participation is much appreciated.
40 bioperl-l@bioperl.org - General discussion
41 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45 Please direct usage questions or support issues to the mailing list:
47 L<bioperl-l@bioperl.org>
49 rather than to the module maintainer directly. Many experienced and
50 reponsive experts will be able look at the problem and quickly
51 address it. Please include a thorough description of the problem
52 with code and data examples if at all possible.
56 Report bugs to the Bioperl bug tracking system to help us keep track
57 the bugs and their resolution. Bug reports can be submitted via the web:
59 http://bugzilla.open-bio.org/
61 =head1 AUTHORS - Ewan Birney & Lincoln Stein
63 Email: E<lt>birney@ebi.ac.ukE<gt>
64 E<lt>lstein@cshl.orgE<gt>
68 Jason Stajich, jason@bioperl.org
72 The rest of the documentation details each of the object
73 methods. Internal methods are usually preceded with a _
77 # Let the code begin...
79 package Bio
::SeqIO
::gcg
;
82 use Bio
::Seq
::SeqFactory
;
84 use base
qw(Bio::SeqIO);
88 $self->SUPER::_initialize
(@args);
89 if( ! defined $self->sequence_factory ) {
90 $self->sequence_factory(Bio
::Seq
::SeqFactory
->new
91 (-verbose
=> $self->verbose(),
92 -type
=> 'Bio::Seq::RichSeq'));
99 Usage : $seq = $stream->next_seq()
100 Function: returns the next sequence in the stream
101 Returns : Bio::Seq object
107 my ($self,@args) = @_;
108 my($id,$type,$desc,$line,$chksum,$sequence,$date,$len);
110 while( defined($_ = $self->_readline()) ) {
112 ## Get the descriptive info (anything before the line with '..')
113 unless( /\.\.$/ ) { $desc.= $_; }
114 ## Pull ID, Checksum & Type from the line containing '..'
115 /\.\.$/ && do { $line = $_; chomp;
116 if(/Check\:\s(\d+)\s/) { $chksum = $1; }
117 if(/Type:\s(\w)\s/) { $type = $1; }
120 if(/Length:\s+(\d+)\s+(\S.+\S)\s+Type/ )
121 { $len = $1; $date = $2;}
125 return if ( !defined $_);
126 chomp($desc); # remove last "\n"
128 while( defined($_ = $self->_readline()) ) {
130 ## This is where we grab the sequence info.
133 $self->throw("Looks like start of another sequence. See documentation. ");
136 next if($_ eq "\n"); ## skip whitespace lines in formatted seq
137 s/[\d\s\t]//g; ## remove anything that is not alphabet char: preserve anything that is not explicitly specified for removal (Stefan Kirov)
138 # $_ = uc($_); ## uppercase sequence: NO. Keep the case. HL
141 ##If we parsed out a checksum, we might as well test it
143 if(defined $chksum) {
144 unless(_validate_checksum
(uc($sequence),$chksum)) {
145 $self->throw("Checksum failure on parsed sequence.");
149 ## Remove whitespace from identifier because the constructor
150 ## will throw a warning otherwise...
151 if(defined $id) { $id =~ s/\s+//g;}
153 ## Turn our parsed "Type: N" or "Type: P" (if found) into the appropriate
154 ## keyword that the constructor expects...
156 if($type eq "N") { $type = "dna"; }
157 if($type eq "P") { $type = "prot"; }
160 return $self->sequence_factory->create(-seq
=> $sequence,
171 Usage : $stream->write_seq(@seq)
172 Function: writes the formatted $seq object into the stream
173 Returns : 1 for success and 0 for error
174 Args : array of Bio::PrimarySeqI object
180 my ($self,@seq) = @_;
182 $self->throw("Did not provide a valid Bio::PrimarySeqI object")
183 unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
185 $self->warn("No whitespace allowed in GCG ID [". $seq->display_id. "]")
186 if $seq->display_id =~ /\s/;
189 my $comment = $seq->desc || '';
191 my $type = ( $seq->alphabet() =~ /[dr]na/i ) ?
'N' : 'P';
194 if( $seq->can('get_dates') ) {
195 ($timestamp) = $seq->get_dates;
197 $timestamp = localtime(time);
199 my($sum,$offset,$len,$i,$j,$cnt,@out);
202 ## Set the offset if we have any non-standard numbering going on
205 $sum = $self->GCG_checksum($seq);
207 #Output the sequence header info
208 push(@out,"$comment\n");
209 push(@out,"$id Length: $len $timestamp Type: $type Check: $sum ..\n\n");
213 for($j = 0 ; $j < $len ; ) {
215 $out[$i] = sprintf("%8d ",($j+$offset)); #numbering
217 $out[$i] .= sprintf("%s",substr($str,$j,10));
219 if( $j < $len && $j % 50 != 0 ) {
221 }elsif($j % 50 == 0 ) {
222 $out[$i++] .= "\n\n";
230 return unless $self->_print(@out);
233 $self->flush if $self->_flush_on_write && defined $self->_fh;
240 Usage : $cksum = $gcgio->GCG_checksum($seq);
241 Function : returns a gcg checksum for the sequence specified
243 This method can also be called as a class method.
245 Returns : a GCG checksum string
246 Argument : a Bio::PrimarySeqI implementing object
251 my ($self,$seqobj) = @_;
256 my $seq = $seqobj->seq();
259 foreach $char ( split(/[\.\-]*/, $seq)) {
261 $checksum += ($index * (unpack("c",$char) || 0) );
267 return ($checksum % 10000);
270 =head2 _validate_checksum
272 Title : _validate_checksum
273 Usage : n/a - internal method
274 Function: if parsed gcg sequence contains a checksum field
275 : we compare it to a value computed here on the parsed
276 : sequence. A checksum mismatch would indicate some
277 : type of parsing failure occured.
279 Returns : 1 for success, 0 for failure
280 Args : string containing parsed seq, value of parsed cheksum
285 sub _validate_checksum
{
286 my($seq,$parsed_sum) = @_;
287 my($i,$len,$computed_sum,$cnt);
291 #Generate the GCG Checksum value
293 for($i=0; $i<$len ;$i++) {
295 $computed_sum += $cnt * ord(substr($seq,$i,1));
296 ($cnt == 57) && ($cnt=0);
298 $computed_sum %= 10000;
300 ## Compare and decide if success or failure
302 if($parsed_sum == $computed_sum) {