nexml.t: Added a missing XML::Twig requirement.
[bioperl-live.git] / Bio / AlignIO / proda.pm
blob756f5fc92006975762c65eb3e0550fac882d13cf
2 # BioPerl module for Bio::AlignIO::proda
4 # based on the Bio::SeqIO modules
5 # by Ewan Birney <birney@ebi.ac.uk>
6 # and Lincoln Stein <lstein@cshl.org>
7 # and the Bio::SimpleAlign module of Ewan Birney
9 # Copyright Albert Vilella
11 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::AlignIO::proda - proda sequence input/output stream
18 This provides the basic capabilities to parse the output alignments
19 from the ProDA multiple sequence alignment program
20 (http://proda.stanford.edu)
22 =head1 SYNOPSIS
24 Do not use this module directly. Use it via the Bio::AlignIO class.
26 =head1 DESCRIPTION
28 This object can transform Bio::Align::AlignI objects to and from proda
29 files.
31 =head1 FEEDBACK
33 =head2 Mailing Lists
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to one
37 of the Bioperl mailing lists. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
42 =head2 Support
44 Please direct usage questions or support issues to the mailing list:
46 I<bioperl-l@bioperl.org>
48 rather than to the module maintainer directly. Many experienced and
49 reponsive experts will be able look at the problem and quickly
50 address it. Please include a thorough description of the problem
51 with code and data examples if at all possible.
53 =head2 Reporting Bugs
55 Report bugs to the Bioperl bug tracking system to help us keep track
56 the bugs and their resolution. Bug reports can be submitted via the
57 web:
59 https://github.com/bioperl/bioperl-live/issues
61 =head1 AUTHORS - Albert Vilella
63 Email: avilella-at-gmail-dot-com
66 =head1 APPENDIX
68 The rest of the documentation details each of the object
69 methods. Internal methods are usually preceded with a _
71 =cut
73 # Let the code begin...
75 package Bio::AlignIO::proda;
76 use vars qw($LINELENGTH);
77 use strict;
80 $LINELENGTH = 60;
81 use base qw(Bio::AlignIO);
83 =head2 new
85 Title : new
86 Usage : $alignio = Bio::AlignIO->new(-format => 'proda',
87 -file => 'filename');
88 Function: returns a new Bio::AlignIO object to handle proda files
89 Returns : Bio::AlignIO::proda object
90 Args : -verbose => verbosity setting (-1, 0, 1, 2)
91 -file => name of file to read in or to write, with ">"
92 -fh => alternative to -file param - provide a filehandle
93 to read from or write to
94 -format => alignment format to process or produce
95 -percentages => display a percentage of identity
96 in each line of the alignment (proda only)
97 -linelength=> alignment output line length (default 60)
99 =cut
101 sub _initialize {
102 my ( $self, @args ) = @_;
103 $self->SUPER::_initialize(@args);
104 my ( $percentages, $ll ) =
105 $self->_rearrange( [qw(PERCENTAGES LINELENGTH)], @args );
106 defined $percentages && $self->percentages($percentages);
107 $self->line_length( $ll || $LINELENGTH );
110 =head2 next_aln
112 Title : next_aln
113 Usage : $aln = $stream->next_aln()
114 Function: returns the next alignment in the stream
115 Returns : Bio::Align::AlignI object
116 Args : NONE
118 See L<Bio::Align::AlignI> for details
120 =cut
122 sub next_aln {
123 my ($self) = @_;
124 my $first_line;
126 while ( $first_line = $self->_readline ) {
127 last if $first_line !~ /^$/;
129 $self->_pushback($first_line);
130 if ( defined( $first_line = $self->_readline )
131 && $first_line !~ /\(/ )
133 $self->throw(
134 "trying to parse a file which does not start with proda headers"
136 } else {
137 # use it inside the loop
138 $self->_pushback($first_line);
140 my %alignments;
141 my $aln = Bio::SimpleAlign->new(
142 -source => 'proda',
143 -verbose => $self->verbose
145 my $order = 0;
146 my %order;
147 $self->{_lastline} = '';
148 my ($first_block, $seen_block, $seen_header) = (0,0,0);
149 my @ids; my @ids_copy;
150 while ( defined( $_ = $self->_readline ) ) {
151 next if (/^\s+$/ && !$first_block);
152 if (/^\s$/) { # line contains no description
153 $seen_block = 1;
154 next;
156 $first_block = 1;
158 # break the loop if we come to the end of the current alignment
159 # and push back the proda header
160 if (/\(/ && $seen_header) {
161 $self->_pushback($_);
162 last;
165 if (/\(/ && !$seen_header) {
166 @ids = split(' ', $_);
167 $seen_header = 1;
168 next;
171 my ( $seqname, $aln_line ) = ( '', '' );
172 if (/^\s*(\S+)\s*\/\s*(\d+)-(\d+)\s+(\S+)\s*$/ox) {
174 # clustal 1.4 format
175 ( $seqname, $aln_line ) = ( "$1:$2-$3", $4 );
177 # } elsif( /^\s*(\S+)\s+(\S+)\s*$/ox ) { without trailing numbers
179 elsif (/^\s*(\S+)\s+(\S+)\s*\d*\s*$/ox) { # with numbers
180 ( $seqname, $aln_line ) = ( $1, $2 );
181 if ( $seqname =~ /^[\*\.\+\:]+$/ ) {
182 $self->{_lastline} = $_;
183 next;
186 else {
187 $self->{_lastline} = $_;
188 next;
191 # we ended up the first block and now are on the second
192 @ids_copy = @ids unless(defined($ids_copy[0])); #FIXME - hacky
193 my $seqname_with_coords = shift(@ids_copy);
194 if ($seqname_with_coords !~ /$seqname/) {
196 $self->throw("header and body of the alignment dont match");
199 $alignments{$seqname_with_coords} .= $aln_line;
201 if ( !$seen_block ) {
202 if (exists $order{$seqname_with_coords}) {
203 $self->warn("Duplicate sequence : $seqname\n".
204 "Can't guarantee alignment quality");
206 else {
207 $order{$seqname_with_coords} = $order++;
213 my ( $sname, $start, $end );
214 foreach my $name ( sort { $order{$a} <=> $order{$b} } keys %alignments ) {
215 if ( $name =~ /(\S+):(\d+)-(\d+)/ ) {
216 ( $sname, $start, $end ) = ( $1, $2, $3 );
218 else {
219 ( $sname, $start ) = ( $name, 1 );
220 my $str = $alignments{$name};
221 $str =~ s/[^A-Za-z]//g;
222 $end = length($str);
224 my $seq = Bio::LocatableSeq->new(
225 -seq => $alignments{$name},
226 -id => $sname,
227 -start => $start,
228 -end => $end,
229 -alphabet => $self->alphabet,
231 $aln->add_seq($seq);
234 return $aln if $aln->num_sequences;
235 return;
238 =head2 write_aln
240 Title : write_aln
241 Usage : $stream->write_aln(@aln)
242 Function: writes the proda-format object (.aln) into the stream
243 Returns : 1 for success and 0 for error
244 Args : Bio::Align::AlignI object
246 =cut
248 sub write_aln {
249 my ($self,@aln) = @_;
250 $self->throw_not_implemented();
253 =head2 percentages
255 Title : percentages
256 Usage : $obj->percentages($newval)
257 Function: Set the percentages flag - whether or not to show percentages in
258 each output line
259 Returns : value of percentages
260 Args : newvalue (optional)
263 =cut
265 sub percentages {
266 my ( $self, $value ) = @_;
267 if ( defined $value ) {
268 $self->{'_percentages'} = $value;
270 return $self->{'_percentages'};
273 =head2 line_length
275 Title : line_length
276 Usage : $obj->line_length($newval)
277 Function: Set the alignment output line length
278 Returns : value of line_length
279 Args : newvalue (optional)
282 =cut
284 sub line_length {
285 my ( $self, $value ) = @_;
286 if ( defined $value ) {
287 $self->{'_line_length'} = $value;
289 return $self->{'_line_length'};
292 =head2 no_header
294 Title : no_header
295 Usage : $obj->no_header($newval)
296 Function: Set if the alignment input has a CLUSTAL header or not
297 Returns : value of no_header
298 Args : newvalue (optional)
301 =cut
303 sub no_header {
304 my ( $self, $value ) = @_;
305 if ( defined $value ) {
306 $self->{'_no_header'} = $value;
308 return $self->{'_no_header'};