3 # BioPerl module for Bio::AlignIO::po
5 # based on the Bio::AlignIO::fasta module
6 # by Peter Schattner (and others?)
8 # and the SimpleAlign.pm module of Ewan Birney
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::AlignIO::po - po MSA Sequence input/output stream
20 Do not use this module directly. Use it via the L<Bio::AlignIO> class.
24 This object can transform L<Bio::SimpleAlign> objects to and from
25 'po' format flat file databases. 'po' format is the native format of
26 the POA alignment program (Lee C, Grasso C, Sharlow MF, 'Multiple
27 sequence alignment using partial order graphs', Bioinformatics (2002),
34 Please direct usage questions or support issues to the mailing list:
36 I<bioperl-l@bioperl.org>
38 rather than to the module maintainer directly. Many experienced and
39 reponsive experts will be able look at the problem and quickly
40 address it. Please include a thorough description of the problem
41 with code and data examples if at all possible.
45 Report bugs to the Bioperl bug tracking system to help us keep track
46 the bugs and their resolution. Bug reports can be submitted via the
49 https://github.com/bioperl/bioperl-live/issues
51 =head1 AUTHORS - Matthew Betts
53 Email: matthew.betts@ii.uib.no
57 The rest of the documentation details each of the object
58 methods. Internal methods are usually preceded with a _
62 # Let the code begin...
64 package Bio
::AlignIO
::po
;
69 use base
qw(Bio::AlignIO);
75 Usage : $aln = $stream->next_aln()
76 Function: returns the next alignment in the stream.
77 Returns : L<Bio::Align::AlignI> object - returns undef on end of file
98 $aln = Bio
::SimpleAlign
->new();
100 # get to the first 'VERSION' line
101 while(defined($entry = $self->_readline)) {
102 if($entry =~ /^VERSION=(\S+)/) {
105 if(defined($entry = $self->_readline) and $entry =~ /^NAME=(\S+)/) {
113 # read in the sequence names and node data, up to the end of
114 # the file or the next 'VERSION' line, whichever comes first
117 while(defined($entry = $self->_readline)) {
118 if($entry =~ /^VERSION/) {
119 # start of a new alignment, so...
120 $self->_pushback($entry);
123 elsif($entry =~ /^SOURCENAME=(\S+)/) {
126 if($name =~ /(\S+)\/(\d
+)-(\d
+)/) {
127 $seq = Bio
::LocatableSeq
->new(
131 '-alphabet' => $self->alphabet,
135 $seq = Bio
::LocatableSeq
->new('-display_id'=> $name,
136 '-alphabet' => $self->alphabet);
139 # store sequences in a list initially, because can't guarantee
140 # that will get them back from SimpleAlign object in the order
141 # they were read, and also can't add them to the SimpleAlign
142 # object here because their sequences are currently unknown
148 elsif($entry =~ /^SOURCEINFO=(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(.*)/) {
151 elsif($entry =~ /^(\S):(\S+)/) {
157 'status' => 'unvisited',
161 if($list =~ /^([L\d]*)([S\d]*)([A\d]*)/) {
162 push(@
{$node->{'L'}}, split(/L/, $1));
163 push(@
{$node->{'S'}}, split(/S/, $2));
164 push(@
{$node->{'A'}}, split(/A/, $3));
166 (@
{$node->{'L'}} > 0) and shift @
{$node->{'L'}};
167 (@
{$node->{'S'}} > 0) and shift @
{$node->{'S'}};
168 (@
{$node->{'A'}} > 0) and shift @
{$node->{'A'}};
171 push @
{$nodes}, $node;
176 foreach $node (@
{$nodes}) {
177 ($node->{'status'} ne 'unvisited') and next;
179 @chars = ($aln->gap_char) x @
{$seqs}; # char for each seq defaults to a gap
181 # set the character for each sequence represented by this node
182 foreach $s (@
{$node->{'S'}}) {
183 $chars[$s] = $node->{'aa'};
185 $node->{'status'} = 'visited';
187 # do the same for each node in the same align ring
188 while(defined($a = $node->{'A'}->[0])) {
189 $node = $nodes->[$a];
190 ($node->{'status'} ne 'unvisited') and last;
192 foreach $s (@
{$node->{'S'}}) {
193 $chars[$s] = $node->{'aa'};
196 $node->{'status'} = 'visited';
199 # update the sequences
200 foreach $seq (@
{$seqs}) {
201 $seq->{'str'} .= shift @chars;
205 # set the sequences of the bioperl objects
206 # and add them to the alignment
207 foreach $seq (@
{$seqs}) {
208 $seq->{'seq'}->seq($seq->{'str'});
209 $aln->add_seq($seq->{'seq'});
212 # has an alignment been read?...
214 return $aln if $aln->num_sequences;
221 Usage : $stream->write_aln(@aln)
222 Function: writes the $aln object into the stream in po format
223 Returns : 1 for success and 0 for error
224 Args : L<Bio::Align::AlignI> object
242 foreach $aln (@alns) {
243 if(!$aln or !$aln->isa('Bio::Align::AlignI')) {
244 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
248 # store the seqs on a list, because po format
249 # refers to them by position on this list
251 foreach $seq ($aln->each_seq()) {
260 # go through each column in the alignment and construct
261 # the nodes for the equivalent poa alignment ring
263 for($col = 0; $col < $aln->length; $col++) {
266 'first' => scalar @
{$nodes},
267 'last' => scalar @
{$nodes},
270 for($i = 0; $i < @
{$seqs}; $i++) {
273 $char = $seq->{'seq'}->subseq($col + 1, $col + 1);
275 ($char eq $aln->gap_char) and next;
277 if(!defined($node = $ring->{'nodes'}->{$char})) {
279 'n' => scalar @
{$nodes},
287 $ring->{'nodes'}->{$char} = $node;
288 $ring->{'last'} = $node->{'n'};
290 # add the node to the node list
291 push @
{$nodes}, $node;
294 # add the sequence to the node
295 push @
{$node->{'S'}}, $i;
297 # add the node to the sequence
298 defined($seq->{'first'}) or ($seq->{'first'} = $node);
301 # add an edge from the previous node in the sequence to this one.
302 # Then set the previous node to the current one, ready for the next
303 # residue in this sequence
304 defined($seq->{'previous'}) and ($node->{'L'}->{$seq->{'previous'}->{'n'}} = $seq->{'previous'});
305 $seq->{'previous'} = $node;
308 # set the 'next node in ring' field for each node in the ring
309 if($ring->{'first'} < $ring->{'last'}) {
310 for($i = $ring->{'first'}; $i < $ring->{'last'}; $i++) {
311 push @
{$nodes->[$i]->{'A'}}, $i + 1;
313 push @
{$nodes->[$ring->{'last'}]->{'A'}}, $ring->{'first'};
317 # print header information
319 'VERSION=', ($aln->source and ($aln->source !~ /\A\s*\Z/)) ?
$aln->source : 'bioperl', "\n",
320 'NAME=', $aln->id, "\n",
321 'TITLE=', ($seqs->[0]->{'seq'}->description or $aln->id), "\n",
322 'LENGTH=', scalar @
{$nodes}, "\n",
323 'SOURCECOUNT=', scalar @
{$seqs}, "\n",
326 # print sequence information
327 foreach $seq (@
{$seqs}) {
329 'SOURCENAME=', $seq->{'seq'}->display_id, "\n",
331 $seq->{'n_nodes'}, ' ', # number of nodes in the sequence
332 $seq->{'first'}->{'n'}, ' ', # index of first node containing the sequence
333 0, ' ', # FIXME - sequence weight?
334 -1, ' ', # FIXME - index of bundle containing sequence?
335 ($seq->{'seq'}->description or 'untitled'), "\n",
339 # print node information
340 foreach $node (@
{$nodes}) {
341 $self->_print($node->{'aa'}, ':');
342 (keys %{$node->{'L'}} > 0) and $self->_print('L', join('L', sort {$a <=> $b} keys %{$node->{'L'}}));
343 (@
{$node->{'S'}} > 0) and $self->_print('S', join('S', @
{$node->{'S'}}));
344 (@
{$node->{'A'}} > 0) and $self->_print('A', join('A', @
{$node->{'A'}}));
348 $self->flush if $self->_flush_on_write && defined $self->_fh;