lib/Bio/SeqIO/ace.pm

   1 #
   2 # BioPerl module for Bio::SeqIO::ace
   3 #
   4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
   5 #
   6 # Cared for by James Gilbert <jgrg@sanger.ac.uk>
   7 #
   8 # You may distribute this module under the same terms as perl itself
   9
  10 # POD documentation - main docs before the code
  11
  12 =head1 NAME
  13
  14 Bio::SeqIO::ace - ace sequence input/output stream
  15
  16 =head1 SYNOPSIS
  17
  18 Do not use this module directly.  Use it via the Bio::SeqIO class.
  19
  20 =head1 DESCRIPTION
  21
  22 This object can transform Bio::Seq objects to and
  23 from ace file format.  It only parses a DNA or
  24 Peptide objects contained in the ace file,
  25 producing PrimarySeq objects from them.  All
  26 other objects in the files will be ignored.  It
  27 doesn't attempt to parse any annotation attached
  28 to the containing Sequence or Protein objects,
  29 which would probably be impossible, since
  30 everyone's ACeDB schema can be different.
  31
  32 It won't parse ace files containing Timestamps
  33 correctly either.  This can easily be added if
  34 considered necessary.
  35
  36 =head1 FEEDBACK
  37
  38 =head2 Mailing Lists
  39
  40 User feedback is an integral part of the evolution of this and other
  41 Bioperl modules. Send your comments and suggestions preferably to one
  42 of the Bioperl mailing lists.  Your participation is much appreciated.
  43
  44   bioperl-l@bioperl.org                  - General discussion
  45   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  46
  47 =head2 Support
  48
  49 Please direct usage questions or support issues to the mailing list:
  50
  51 I<bioperl-l@bioperl.org>
  52
  53 rather than to the module maintainer directly. Many experienced and
  54 reponsive experts will be able look at the problem and quickly
  55 address it. Please include a thorough description of the problem
  56 with code and data examples if at all possible.
  57
  58 =head2 Reporting Bugs
  59
  60 Report bugs to the Bioperl bug tracking system to help us keep track
  61 the bugs and their resolution.
  62 Bug reports can be submitted via the web:
  63
  64   https://github.com/bioperl/bioperl-live/issues
  65
  66 =head1 AUTHORS - James Gilbert
  67
  68 Email: jgrg@sanger.ac.uk
  69
  70 =head1 APPENDIX
  71
  72 The rest of the documentation details each of the object
  73 methods. Internal methods are usually preceded with a _
  74
  75 =cut
  76
  77 #'
  78 # Let the code begin...
  79
  80 package Bio::SeqIO::ace;
  81 use strict;
  82
  83 use Bio::Seq;
  84 use Bio::Seq::SeqFactory;
  85
  86 use base qw(Bio::SeqIO);
  87
  88 sub _initialize {
  89   my($self,@args) = @_;
  90   $self->SUPER::_initialize(@args);
  91   if( ! defined $self->sequence_factory ) {
  92       $self->sequence_factory(Bio::Seq::SeqFactory->new(-verbose => $self->verbose(), -type => 'Bio::PrimarySeq'));
  93   }
  94 }
  95
  96 =head2 next_seq
  97
  98  Title   : next_seq
  99  Usage   : $seq = $stream->next_seq()
 100  Function: returns the next sequence in the stream
 101  Returns : Bio::Seq object
 102  Args    : NONE
 103
 104 =cut
 105
 106 {
 107     my %bio_mol_type = (
 108         'dna'       => 'dna',
 109         'peptide'   => 'protein',
 110     );
 111
 112     sub next_seq {
 113         my( $self ) = @_;
 114         local $/ = "";  # Split input on blank lines
 115
 116         my $fh = $self->_filehandle;
 117         my( $type, $id );
 118         while (<$fh>) {
 119             if (($type, $id) = /^(DNA|Peptide)[\s:]+(.+?)\s*\n/si) {
 120                 s/^.+$//m;  # Remove first line
 121                 s/\s+//g;   # Remove whitespace
 122                 last;
 123             }
 124         }
 125         # Return if there weren't any DNA or peptide objects
 126         return unless $type;
 127
 128         # Choose the molecule type
 129         my $mol_type = $bio_mol_type{lc $type}
 130             or $self->throw("Can't get Bio::Seq molecule type for '$type'");
 131
 132         # Remove quotes from $id
 133         $id =~ s/^"|"$//g;
 134
 135         # Un-escape forward slashes, double quotes, percent signs,
 136         # semi-colons, tabs, and backslashes (if you're mad enough
 137         # to have any of these as part of object names in your acedb
 138         # database).
 139         $id =~ s/\\([\/"%;\t\\])/$1/g;
 140 #"
 141         # Called as next_seq(), so give back a Bio::Seq
 142         return $self->sequence_factory->create(
 143                                                -seq        => $_,
 144                                                -primary_id => $id,
 145                                                -display_id => $id,
 146                                                -alphabet    => $mol_type,
 147                                                );
 148     }
 149 }
 150
 151 =head2 write_seq
 152
 153  Title   : write_seq
 154  Usage   : $stream->write_seq(@seq)
 155  Function: writes the $seq object into the stream
 156  Returns : 1 for success and 0 for error
 157  Args    : Bio::Seq object(s)
 158
 159
 160 =cut
 161
 162 sub write_seq {
 163     my ($self, @seq) = @_;
 164
 165     foreach my $seq (@seq) {
 166         $self->throw("Did not provide a valid Bio::PrimarySeqI object")
 167             unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
 168         my $mol_type = $seq->alphabet;
 169         my $id = $seq->display_id;
 170
 171         # Escape special charachers in id
 172         $id =~ s/([\/"%;\t\\])/\\$1/g;
 173 #"
 174         # Print header for DNA or Protein object
 175         if ($mol_type eq 'dna') {
 176             $self->_print(
 177                 qq{\nSequence : "$id"\nDNA "$id"\n},
 178                 qq{\nDNA : "$id"\n},
 179             );
 180         }
 181         elsif ($mol_type eq 'protein') {
 182             $self->_print(
 183                 qq{\nProtein : "$id"\nPeptide "$id"\n},
 184                 qq{\nPeptide : "$id"\n},
 185             );
 186         }
 187         else {
 188             $self->throw("Don't know how to produce ACeDB output for '$mol_type'");
 189         }
 190
 191         # Print the sequence
 192         my $str = $seq->seq;
 193         my( $formatted_seq );
 194         while ($str =~ /(.{1,60})/g) {
 195             $formatted_seq .= "$1\n";
 196         }
 197         $self->_print($formatted_seq, "\n");
 198     }
 199
 200     $self->flush if $self->_flush_on_write && defined $self->_fh;
 201     return 1;
 202 }
 203
 204 1;