merge upstream
[bioperl-live.git] / Bio / AlignIO / xmfa.pm
blob089d22720412775f3527b6641940e9c1faeeb20d
2 # BioPerl module for Bio::AlignIO::xmfa
4 # Copyright Chris Fields
6 # You may distribute this module under the same terms as perl itself
7 # POD documentation - main docs before the code
9 =head1 NAME
11 Bio::AlignIO::xmfa - XMFA MSA Sequence input/output stream
13 =head1 SYNOPSIS
15 Do not use this module directly. Use it via the L<Bio::AlignIO>
16 class.
18 =head1 DESCRIPTION
20 This object can transform L<Bio::SimpleAlign> objects from
21 XMFA flat file databases. For more information, see:
23 http://asap.ahabs.wisc.edu/mauve-aligner/mauve-user-guide/mauve-output-file-formats.html
25 This module is based on the AlignIO::fasta parser written by
26 Peter Schattner
28 =head1 TODO
30 Finish write_aln(), clean up code, allow LargeLocatableSeq (ie for
31 very large sequences a'la Mauve)
33 =head1 FEEDBACK
35 =head2 Support
37 Please direct usage questions or support issues to the mailing list:
39 I<bioperl-l@bioperl.org>
41 rather than to the module maintainer directly. Many experienced and
42 reponsive experts will be able look at the problem and quickly
43 address it. Please include a thorough description of the problem
44 with code and data examples if at all possible.
46 =head2 Reporting Bugs
48 Report bugs to the Bioperl bug tracking system to help us keep track
49 the bugs and their resolution. Bug reports can be submitted via the
50 web:
52 https://github.com/bioperl/bioperl-live/issues
54 =head1 AUTHORS
56 Chris Fields
58 =head1 APPENDIX
60 The rest of the documentation details each of the object
61 methods. Internal methods are usually preceded with a _
63 =cut
65 # Let the code begin...
67 package Bio::AlignIO::xmfa;
68 use strict;
70 use base qw(Bio::AlignIO);
71 our $WIDTH = 60;
73 =head2 next_aln
75 Title : next_aln
76 Usage : $aln = $stream->next_aln
77 Function: returns the next alignment in the stream.
78 Returns : Bio::Align::AlignI object - returns 0 on end of file
79 or on error
80 Args : -width => optional argument to specify the width sequence
81 will be written (60 chars by default)
83 See L<Bio::Align::AlignI>
85 =cut
87 sub next_aln {
88 my $self = shift;
89 my ($width) = $self->_rearrange([qw(WIDTH)],@_);
90 $self->width($width || $WIDTH);
92 my ($name, $tempname, $seqchar);
93 my $aln = Bio::SimpleAlign->new();
94 my $seqs = 0;
95 # alignments
96 while (defined (my $entry = $self->_readline) ) {
97 chomp $entry;
98 if ( index($entry, '=') == 0 ) {
99 if (defined $name && $seqchar) {
100 my $seq = $self->_process_seq($name, $seqchar);
101 $aln->add_seq($seq);
103 if ($aln && $entry =~ m{score\s*=\s*(\d+)}) {
104 $aln->score($1);
106 $seqchar = '';
107 undef $name;
108 last;
109 } elsif ( $entry =~ m{^>.+$}xms) {
110 if ( defined $name ) {
111 my $seq = $self->_process_seq($name, $seqchar);
112 $aln->add_seq($seq);
114 $seqchar = '';
115 $name = $entry;
116 } else {
117 $seqchar .= $entry;
121 # this catches last sequence if '=' is not present (Mauve)
122 if ( defined $name ) {
123 my $seq = $self->_process_seq($name, $seqchar);
124 $aln->add_seq($seq);
126 $aln->num_sequences ? return $aln : return;
129 =head2 write_aln
131 Title : write_aln
132 Usage : $stream->write_aln(@aln)
133 Function: writes the $aln object into the stream in xmfa format
134 Returns : 1 for success and 0 for error
135 Args : L<Bio::Align::AlignI> object
137 See L<Bio::Align::AlignI>
139 =cut
141 sub write_aln {
142 my ($self,@aln) = @_;
143 my $width = $self->width;
144 my ($seq,$desc,$rseq,$name,$count,$length,$seqsub,$start,$end,$strand,$id);
146 foreach my $aln (@aln) {
147 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
148 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
149 next;
151 #if( $self->force_displayname_flat ) {
152 # $aln->set_displayname_flat(1);
154 my $seqct = 1;
155 foreach $rseq ( $aln->each_seq() ) {
156 ($start, $end, $strand, $id) = ($rseq->start, $rseq->end, $rseq->strand || 0,
157 $rseq->display_id);
158 $strand = ($strand == 1) ? '+' :
159 ($strand == -1) ? '-' :
161 $name = sprintf("%d:%d-%d %s %s",$seqct,$start,$end,$strand,$id);
162 $seq = $rseq->seq();
163 $desc = $rseq->description || '';
164 $self->_print (">$name $desc\n") or return ;
165 $count = 0;
166 $length = length($seq);
167 if(defined $seq && $length > 0) {
168 $seq =~ s/(.{1,$width})/$1\n/g;
169 } else {
170 $seq = "\n";
172 $self->_print($seq) || return 0;
173 $seqct++;
175 my $alndesc = '';
176 $alndesc = "score = ".$aln->score if ($aln->score);
177 $self->_print("= $alndesc\n") || return 0;
180 $self->flush if $self->_flush_on_write && defined $self->_fh;
181 return 1;
184 =head2 _get_len
186 Title : _get_len
187 Usage :
188 Function: determine number of alphabetic chars
189 Returns : integer
190 Args : sequence string
192 =cut
194 sub _get_len {
195 my ($self,$seq) = @_;
196 $seq =~ s/[^A-Z]//gi;
197 return CORE::length($seq);
200 =head2 width
202 Title : width
203 Usage : $obj->width($newwidth)
204 $width = $obj->width;
205 Function: Get/set width of alignment
206 Returns : integer value of width
207 Args : on set, new value (a scalar or undef, optional)
210 =cut
212 sub width{
213 my $self = shift;
215 return $self->{'_width'} = shift if @_;
216 return $self->{'_width'} || $WIDTH;
219 ####### PRIVATE #######
221 sub _process_seq {
222 my ($self, $entry, $seq) = @_;
223 my ($start, $end, $strand, $seqname, $desc, $all);
224 # put away last name and sequence
225 if ( $entry =~ m{^>\s*\d+:(\d+)-(\d+)\s([+-]{1})(?:\s+(\S+)\s*(\S\.*)?)?} ) {
226 ($start, $end, $seqname, $desc) = ($1, $2, $4, $5);
227 $strand = ($3 eq '+') ? 1 : -1;
228 } else {
229 $self->throw("Line does not comform to XMFA format:\n$entry");
231 my $seqobj = Bio::LocatableSeq->new(
232 -nowarnonempty => 1,
233 -strand => $strand,
234 -seq => $seq,
235 -display_id => $seqname,
236 -description => $desc || $all,
237 -start => $start,
238 -end => $end,
239 -alphabet => $self->alphabet,
241 $self->debug("Reading $seqname\n");
242 return $seqobj;